In [62]:
import requests
import pandas as pd
import numpy as np
from concurrent.futures import ThreadPoolExecutor

### SEC.GOV Form Filings Imported Directly

#### Example Query

https://www.sec.gov/cgi-bin/browse-edgar?company=&CIK=&type=4owner=include&count=40&action=getcurrent

#### Query String Parameters

* **company:** (Exact name -- i.e. MANGOCEUTICALS, INC.)
* **CIK:** Central Index Key (Can be cross referenced from Ticker)
* **type:** *4*
* **owner:** *include*
* **count:** 20, 40, 60, 100 (Max?)
* **action:** *getcurrent*

#### Full REST Request Info

- **:method:** GET
- **:scheme:** https
- **:authority:** www.sec.gov
- **:path:** /cgi-bin/browse-edgar?company=Apple%2C+Inc&CIK=&type=4&owner=include&count=100&action=getcurrent
- **Accept:** text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8
- **Accept-Encoding:** gzip, deflate, br
- **Accept-Language:** en-US,en;q=0.9
- **Connection:** keep-alive
- **Host:** www.sec.gov
- **Referer:** https://www.sec.gov/cgi-bin/browse-edgar?company=&CIK=&type=4&owner=include&count=40&action=getcurrent
- **Sec-Fetch-Dest:** document
- **Sec-Fetch-Mode:** navigate
- **Sec-Fetch-Site:** same-origin

In [17]:
filings_url = 'https://www.sec.gov/cgi-bin/browse-edgar?'
header = {'User-Agent': 'XtraByte Consulting, Inc., bkowalczyk@xtrabyteconsulting.com',
          'Accept': 'application/json',
          'Accept-Encoding': 'gzip, deflate'}
params = {'company' : '',
          'CIK' : '',
          'type' : '4',
          'owner' : 'include',
          'count' : '10',
          'action' : 'getcurrent',}

latest_filings = requests.get(filings_url, params=params, headers=header)

In [18]:
latest_filings

b'\n<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">\n<html lang="ENG">\n\n<head>\n<title>Latest EDGAR Filings</title>\n<!-- BEGIN HEADER -->\n<script language="JavaScript" src="/include/sec.js" type="text/javascript"></script>\n<link rel="alternate" type="application/atom+xml" title="ATOM" href="/cgi-bin/browse-edgar?action=getcurrent&amp;type=4&amp;company=&amp;dateb=&amp;owner=include&amp;count=10&amp;output=atom" />\n\n</head>\n\n<body style="margin: 0">\n<!-- SEC Web Analytics - For information please visit: https://www.sec.gov/privacy.htm#collectedinfo -->\n<noscript><iframe src="//www.googletagmanager.com/ns.html?id=GTM-TD3BKV"\nheight="0" width="0" style="display:none;visibility:hidden"></iframe></noscript>\n<script>(function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({\'gtm.start\':\nnew Date().getTime(),event:\'gtm.js\'});var f=d.getElementsByTagName(s)[0],\nj=d.createElement(s),dl=l!=\'dataLayer\'?\'&l=\'+l:\'\';j.async=true;j.src=\n\'//www.googletagmanager.com/gtm.js?

In [19]:
from bs4 import BeautifulSoup

soup = BeautifulSoup(latest_filings.content, 'html.parser')

In [20]:
print(soup.contents)

['\n', 'HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"', '\n', <html lang="ENG">
<head>
<title>Latest EDGAR Filings</title>
<!-- BEGIN HEADER -->
<script language="JavaScript" src="/include/sec.js" type="text/javascript"></script>
<link href="/cgi-bin/browse-edgar?action=getcurrent&amp;type=4&amp;company=&amp;dateb=&amp;owner=include&amp;count=10&amp;output=atom" rel="alternate" title="ATOM" type="application/atom+xml"/>
</head>
<body style="margin: 0">
<!-- SEC Web Analytics - For information please visit: https://www.sec.gov/privacy.htm#collectedinfo -->
<noscript><iframe height="0" src="//www.googletagmanager.com/ns.html?id=GTM-TD3BKV" style="display:none;visibility:hidden" width="0"></iframe></noscript>
<script>(function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':
new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0],
j=d.createElement(s),dl=l!='dataLayer'?'&l='+l:'';j.async=true;j.src=
'//www.googletagmanager.com/gtm.js?id='+i+dl;f.parentNode.insertBef

In [21]:
tables = soup.find_all('table')

print(len(tables))

8


In [7]:
SEC_DOT_GOV = 'https://www.sec.gov'
def download_archived_report(location: str):
  header = {'User-Agent': 'XtraByte Consulting, Inc., bkowalczyk@xtrabyteconsulting.com',
          'Accept': 'application/json',
          'Accept-Encoding': 'gzip, deflate'}
  response = requests.get(SEC_DOT_GOV + location, headers=header)
  if response.status_code == 200:
    with open(location.split('/')[-1], 'wb') as f:
      f.write(response.content)
    return response.content.decode('utf-8')
  else:
    return None

In [9]:
report = download_archived_report('/Archives/edgar/data/350698/000095017024004354/0000950170-24-004354.txt')

In [10]:
text = report.decode('utf-8')
print(text)

<SEC-DOCUMENT>0000950170-24-004354.txt : 20240112
<SEC-HEADER>0000950170-24-004354.hdr.sgml : 20240112
<ACCEPTANCE-DATETIME>20240112213007
ACCESSION NUMBER:		0000950170-24-004354
CONFORMED SUBMISSION TYPE:	4
PUBLIC DOCUMENT COUNT:		1
CONFORMED PERIOD OF REPORT:	20240110
FILED AS OF DATE:		20240112
DATE AS OF CHANGE:		20240112

REPORTING-OWNER:	

	OWNER DATA:	
		COMPANY CONFORMED NAME:			LAMPERT EDWARD S
		CENTRAL INDEX KEY:			0001183200
		ORGANIZATION NAME:           	

	FILING VALUES:
		FORM TYPE:		4
		SEC ACT:		1934 Act
		SEC FILE NUMBER:	001-13107
		FILM NUMBER:		24533437

	MAIL ADDRESS:	
		STREET 1:		1170 KANE CONCOURSE
		STREET 2:		SUITE 200
		CITY:			BAY HARBOUR
		STATE:			FL
		ZIP:			33154

ISSUER:		

	COMPANY DATA:	
		COMPANY CONFORMED NAME:			AUTONATION, INC.
		CENTRAL INDEX KEY:			0000350698
		STANDARD INDUSTRIAL CLASSIFICATION:	RETAIL-AUTO DEALERS & GASOLINE STATIONS [5500]
		ORGANIZATION NAME:           	07 Trade & Services
		IRS NUMBER:				731105145
		STATE OF INCORPORATIO

In [1]:
import xml.etree.ElementTree as ET
import xml.parsers.expat as expat
import xml.dom.minidom as minidom
import xml.sax as sax
import xml.etree.ElementPath as EP


In [None]:

def extract_header(text: str, delimeter: str = '</SEC-HEADER>'):
  header = text.split(delimeter)[0] + '</SEC-HEADER>'
  body = text.split(delimeter)[1]
  return header, body


In [44]:

with open('outputs/archived-form4-file.txt', 'r') as f:
  text = f.read()
  header, body, header_closing_tags = text.split('XML>\n')
  header = header + header_closing_tags
  body = body.removesuffix('</')
  print(body[-10:])
  root = ET.fromstring(body)
  print(root.tag)
  print(root.attrib)
  


Document>

ownershipDocument
{}


In [46]:
for child in root:
  print(child.tag, child.attrib, child.text, child.tail, child.items())

schemaVersion {} X0508 

     []
documentType {} 4 

     []
periodOfReport {} 2024-01-10 

     []
issuer {} 
         

     []
reportingOwner {} 
         

     []
aff10b5One {} false 

     []
nonDerivativeTable {} 
         

     []
footnotes {} 
         

     []
ownerSignature {} 
         
 []


In [49]:
def xml_tree_to_dict1(tree):
  return {tree.tag : {tree.attrib : [xml_tree_to_dict1(child) for child in tree]}}

def xml_tree_to_dict(tree):
  return {tree.tag :  map(xml_tree_to_dict, tree.iterchildren()) or tree.text}

In [51]:
import xmltodict

dict_insider_ = xmltodict.parse(body)

In [60]:
print(dict_insider_['ownershipDocument']['footnotes']['footnote'])

{'@id': 'F1', '#text': 'Includes 1,649 shares of common stock of AutoNation, Inc. (the "Issuer"), par value $0.01 per share (each, a "Share"), held by The Nicholas Floyd Lampert 2015 Trust ("The Nicholas Trust"), and 1,649 Shares held by The Nina Rose Lampert 2015 Trust (the "The Nina Trust" and, together with The Nicholas Trust, the "Trusts"). The reporting person may be deemed to have beneficial ownership of securities owned by the Trusts. The reporting person disclaims beneficial ownership of the securities owned by the Trusts, except to the extent of the pecuniary interest of the reporting person in such securities. The reporting person states that neither the filing of this statement nor anything herein shall be deemed an admission that such person is, for purposes of Section 16 of the Securities Exchange Act of 1934, as amended or otherwise, the beneficial owner of any securities owned by the Trusts.'}


In [64]:
inside_her_ = pd.json_normalize(dict_insider_, sep='_')
flat_inside_her_ = inside_her_.to_dict(orient='records')[0]
flat_inside_her_

{'ownershipDocument_schemaVersion': 'X0508',
 'ownershipDocument_documentType': '4',
 'ownershipDocument_periodOfReport': '2024-01-10',
 'ownershipDocument_issuer_issuerCik': '0000350698',
 'ownershipDocument_issuer_issuerName': 'AUTONATION, INC.',
 'ownershipDocument_issuer_issuerTradingSymbol': 'AN',
 'ownershipDocument_reportingOwner_reportingOwnerId_rptOwnerCik': '0001183200',
 'ownershipDocument_reportingOwner_reportingOwnerId_rptOwnerName': 'LAMPERT EDWARD S',
 'ownershipDocument_reportingOwner_reportingOwnerAddress_rptOwnerStreet1': '1170 KANE CONCOURSE, SUITE 200',
 'ownershipDocument_reportingOwner_reportingOwnerAddress_rptOwnerStreet2': None,
 'ownershipDocument_reportingOwner_reportingOwnerAddress_rptOwnerCity': 'BAY HARBOR ISLANDS',
 'ownershipDocument_reportingOwner_reportingOwnerAddress_rptOwnerState': 'FL',
 'ownershipDocument_reportingOwner_reportingOwnerAddress_rptOwnerZipCode': '33154',
 'ownershipDocument_reportingOwner_reportingOwnerAddress_rptOwnerStateDescription'

In [65]:
inside_her_

Unnamed: 0,ownershipDocument_schemaVersion,ownershipDocument_documentType,ownershipDocument_periodOfReport,ownershipDocument_issuer_issuerCik,ownershipDocument_issuer_issuerName,ownershipDocument_issuer_issuerTradingSymbol,ownershipDocument_reportingOwner_reportingOwnerId_rptOwnerCik,ownershipDocument_reportingOwner_reportingOwnerId_rptOwnerName,ownershipDocument_reportingOwner_reportingOwnerAddress_rptOwnerStreet1,ownershipDocument_reportingOwner_reportingOwnerAddress_rptOwnerStreet2,...,ownershipDocument_nonDerivativeTable_nonDerivativeTransaction_transactionAmounts_transactionShares_value,ownershipDocument_nonDerivativeTable_nonDerivativeTransaction_transactionAmounts_transactionPricePerShare_value,ownershipDocument_nonDerivativeTable_nonDerivativeTransaction_transactionAmounts_transactionAcquiredDisposedCode_value,ownershipDocument_nonDerivativeTable_nonDerivativeTransaction_postTransactionAmounts_sharesOwnedFollowingTransaction_value,ownershipDocument_nonDerivativeTable_nonDerivativeTransaction_ownershipNature_directOrIndirectOwnership_value,ownershipDocument_nonDerivativeTable_nonDerivativeTransaction_ownershipNature_directOrIndirectOwnership_footnoteId_@id,ownershipDocument_footnotes_footnote_@id,ownershipDocument_footnotes_footnote_#text,ownershipDocument_ownerSignature_signatureName,ownershipDocument_ownerSignature_signatureDate
0,X0508,4,2024-01-10,350698,"AUTONATION, INC.",AN,1183200,LAMPERT EDWARD S,"1170 KANE CONCOURSE, SUITE 200",,...,77284,142.5801,D,4659161,D,F1,F1,"Includes 1,649 shares of common stock of AutoN...","EDWARD S. LAMPERT, By:\t/s/ Edward S. Lampert",2024-01-12


In [81]:
filings_url = 'https://www.sec.gov/edgar/search/#/'
header = {'User-Agent': 'XtraByte Con/sulting, Inc., bkowalczyk@xtrabyteconsulting.com',
          'Accept': 'appliatcionjson',
          'Accept-Encoding': 'gzip, deflate'}
params = {'dataRange' : 'all',
          'category' : 'custom',
          'ciks' : f'{flat_inside_her_["ownershipDocument_reportingOwner_reportingOwnerId_rptOwnerCik"]}',
          'entityName' : f'{flat_inside_her_["ownershipDocument_reportingOwner_reportingOwnerId_rptOwnerName"]}',
          'forms' : '4',}
edward_filings = requests.get(filings_url, params=params, headers=header)
edward_filings.content



In [None]:
https://www.sec.gov/edgar/search/#/dateRange=all&category=custom&ciks=0001183200&entityName=LAMPERT%2520EDWARD%2520S%2520(CIK%25200001183200)&forms=4

In [84]:
ed_filings_url = 'https://www.sec.gov/submissions/CIK0001183200.json'
ed_filings = requests.get(ed_filings_url)
ed_filings.content


b'<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">\n<html xmlns="http://www.w3.org/1999/xhtml">\n<head>\n<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />\n<title>SEC.gov | Request Rate Threshold Exceeded</title>\n<style>\nhtml {height: 100%}\nbody {height: 100%; margin:0; padding:0;}\n#header {background-color:#003968; color:#fff; padding:15px 20px 10px 20px;font-family:Arial, Helvetica, sans-serif; font-size:20px; border-bottom:solid 5px #000;}\n#footer {background-color:#003968; color:#fff; padding:15px 20px;font-family:Arial, Helvetica, sans-serif; font-size:20px;}\n#content {max-width:650px;margin:60px auto; padding:0 20px 100px 20px; background-image:url(seal_bw.png);background-repeat:no-repeat;background-position:50% 100%;}\nh1 {font-family:Georgia, Times, serif; font-size:20px;}\nh2 {text-align:center; font-family:Georgia, Times, serif; font-size:20px; width:100%; border-bottom:solid #9

In [None]:
ed_json = ed_filings.json()
ed_json

In [None]:
insiders_df = pd.DataFrame(columns=['schema', 'report_date', 'issuer_cik', 'issuer_name',
                                    'issuer_ticker', 'insider_cik', 'insider_name', 
                                    'insider_street', 'insider_city', 'insider_state', 
                                    'insider_zip', 'director', 'officer', 
                                    'ten_percent_owner', 'other', 'transaction_type', 
                                    'security_name', 'transaction_date', 'transaction_code', 
                                    'equity_sw5ap', 'transaction_shares', 'price_per_share', 'acquired_disposed', 
                                    'shares_owned_following',  'direct_indirect', 'footnode_id', 'footnote', 
                                    'owner_name', 'signature_date'])


In [None]:
import xml.etree.ElementTree as ET

# Assuming xml_string contains your XML data
xml_string = text

# Parse the XML
root = ET.fromstring(xml_string)

# Navigate to the ownershipDocument element
ownership_document = root.find('.//ownershipDocument')

# Dictionary to store the extracted data
ownership_data = {}

# Extracting data
if ownership_document is not None:
    for child in ownership_document:
        # Assuming you want to store text of each child element in the dictionary
        ownership_data[child.tag] = child.text.strip() if child.text else None

        # If there are further nested elements, you can iterate through them as well
        # For example, if there are child elements in 'issuer':
        if child.tag == 'issuer':
            issuer_data = {}
            for subchild in child:
                issuer_data[subchild.tag] = subchild.text.strip() if subchild.text else None
            ownership_data['issuer'] = issuer_data

# ownership_data now contains the extracted information
print(ownership_data)


In [5]:
response = requests.get('https://www.sec.gov/Archives/edgar/data/350698/000095017024004354/0000950170-24-004354.txt')

In [6]:
print(response)

<Response [403]>


In [34]:
import re
# Find the table with <th> headers
table_with_headers = None
for table in tables:
    if table.find('th'):
        table_with_headers = table
        break

if table_with_headers:
    headers = [th.text.strip() for th in table_with_headers.find_all('th')]

    # Initialize a list to store your table data, starting with headers
    table_data = [headers]

    # Iterate over the rows in the table
    for row in table_with_headers.find_all('tr'):
        row_data = []
        for cell in row.find_all('td'):
            link = cell.find('a', href= re.compile(r'^/Archives.*.txt'))
            if link:
                row_data.append(link['href'])
            else:
                row_data.append(cell.text.strip())
        #row_data = [cell.text.strip() for cell in row.find_all('td')]
        if row_data:  # This check avoids adding an empty list for rows without <td>
            table_data.append(row_data)

    # Now table_data contains the data from the table, with headers as the first row
    for row in table_data:
        print(row)
else:
    print("No table with header rows found.")

['Form', 'Formats', 'Description', 'Accepted', 'Filing Date', 'File/Film No']
['COMMODORE CAPITAL LP (0001831942) (Reporting)', 'COMMODORE CAPITAL LP (0001831942) (Reporting)', 'COMMODORE CAPITAL LP (0001831942) (Reporting)']
['4', '/Archives/edgar/data/1831942/000149315224002290/0001493152-24-002290.txt', 'Statement of changes in beneficial ownership of securitiesAccession Number: 0001493152-24-002290 \xa0Act: 34 \xa0Size:\xa05 KB', '2024-01-1221:55:09', '2024-01-12', '001-40407\n24533451']
['Vera Therapeutics, Inc. (0001831828) (Issuer)', 'Vera Therapeutics, Inc. (0001831828) (Issuer)', 'Vera Therapeutics, Inc. (0001831828) (Issuer)']
['4', '/Archives/edgar/data/1831828/000149315224002290/0001493152-24-002290.txt', 'Statement of changes in beneficial ownership of securitiesAccession Number: 0001493152-24-002290 \xa0Size:\xa05 KB', '2024-01-1221:55:09', '2024-01-12']
['Ray-Chaudhuri Avijit K. (0002008129) (Reporting)', 'Ray-Chaudhuri Avijit K. (0002008129) (Reporting)', 'Ray-Chaudhuri

In [None]:
import re

text = "some text<XML>inside</XML>some text"
split_text = re.split(r'</?XML>', text)
print(split_text)

'<XML>'

