## Screen through all trades to find most important 

In [35]:
import pandas as pd 
from requests_html import HTMLSession
import os
import requests
from lxml import html 
import csv
from datetime import date,datetime
import re 

In [36]:
def fetchSession(url):
    session = HTMLSession()
    r = session.get(url)
    return r

def getTrades(r):
    table = r.html.find('table')[0]
    rows = table.find('tr')
    return rows[1:]

In [37]:
def arr_to_dict(lst):
    it = iter(lst)
    res_dict = dict(zip(it,it))
    return res_dict 
def value_to_ints(value):
    bad_chars = [
        ',','$','-'
    ]
    for c in bad_chars:
        value = value.replace(c,'')
    low, high = [
        int(x) for x in (value.split('  ', 1))
    ]
    return [low,high]

## Base list of trades

In [38]:
def scrapeAllTrades():
    r = fetchSession('https://sec.report/Senate-Stock-Disclosures')
    trades = getTrades(r)
    n = len(trades)
    all_trades = []
    l1_head = [
        'trade date', 'file date', 'trade', 'senator'
    ]
    l2_head = [
        'trade type', 'value'
    ]
    for i in range(0,n,2):
        trade = []
        l1_elements = trades[i].find('td')
        l2_elements = trades[i+1].find('td')[:-1]
        file_date, trade_date = l1_elements[0].text.split('\n')
        trade_snip = l1_elements[1].text
        senator = l1_elements[2].text
        l1_cleaned = [
            trade_date,file_date,trade_snip,senator
        ]
        for h,e in zip(l1_head, l1_cleaned):
            trade.append(h)
            trade.append(e)
        for h,e in zip(l2_head, l2_elements):
            trade.append(h)
            trade.append(e.text)
        trade[9] = trade[9].split('\n', 1)[0]
        trade[11] = value_to_ints(trade[11])
        trade = arr_to_dict(trade)
        all_trades.append(trade)
    return all_trades
all_trades = scrapeAllTrades()

In [39]:
all_trades[0:2]

[{'trade date': '2022-03-09',
  'file date': '2022-04-13',
  'trade': 'Del Taco Restaurants, Inc. - Common Stock [TACO]',
  'senator': 'William F Hagerty, IV [Hagerty, Bill]',
  'trade type': 'Sale (Partial)',
  'value': [15001, 50000]},
 {'trade date': '2022-03-29',
  'file date': '2022-04-08',
  'trade': 'Nov 22 CBT Soybeans',
  'senator': 'Thomas H Tuberville [Tuberville, Tommy]',
  'trade type': 'Purchase',
  'value': [1001, 15000]}]

## On certain day

In [40]:
today = '2022-04-13'
today_dt = datetime.strptime(
    today, '%Y-%m-%d'
).date()
def scrapeAllTradesDate(date):
    r = fetchSession('https://sec.report/Senate-Stock-Disclosures')
    trades = getTrades(r)
    n = len(trades)
    all_trades = []
    l1_head = [
        'trade date', 'file date', 'trade', 'senator'
    ]
    l2_head = [
        'trade type', 'value'
    ]
    current = True
    while current:
        for i in range(0,n,2):
            trade = []
            l1_elements = trades[i].find('td')
            l2_elements = trades[i+1].find('td')[:-1]
            file_date, trade_date = l1_elements[0].text.split('\n')
            trade_snip = l1_elements[1].text
            senator = l1_elements[2].text
            l1_cleaned = [
                trade_date,file_date,trade_snip,senator
            ]
            for h,e in zip(l1_head, l1_cleaned):
                trade.append(h)
                trade.append(e)
            for h,e in zip(l2_head, l2_elements):
                trade.append(h)
                trade.append(e.text)
            if trade[3] != date:
                print('no major trades')
                current = False
                break 
            trade[9] = trade[9].split('\n', 1)[0]
            trade[11] = value_to_ints(trade[11])
            trade = arr_to_dict(trade)
            all_trades.append(trade)
    return all_trades
all_trades_today = scrapeAllTradesDate(today)

no major trades


In [41]:
all_trades_today

[{'trade date': '2022-03-09',
  'file date': '2022-04-13',
  'trade': 'Del Taco Restaurants, Inc. - Common Stock [TACO]',
  'senator': 'William F Hagerty, IV [Hagerty, Bill]',
  'trade type': 'Sale (Partial)',
  'value': [15001, 50000]}]

## 1. Screen by value of purchase

In [42]:
def getLargePurchases(all_trades):
    large_trades = []
    for t in all_trades:
        if t['value'][1] > 50001 and t['trade type'] == 'Purchase':
            # clean up data for presenation
            # removed part to find how many days ago for simplicity for testing
            trade_date = t['trade date']
            value_string = '$' + (
                "{:,}".format(t['value'][0])
            ) + ' to $' + (
                "{:,}".format(t['value'][1])
            )
            large_trades.append(
                 {
                'Trade' : t['trade'],
                'Trade Type' : t['trade type'],
                'Value' : value_string,
                'Trade Date' : trade_date,
                'Senator' : t['senator']
                }
            )
    return large_trades

In [43]:
getLargePurchases(all_trades)[0:2]

[{'Trade': 'ChannelAdvisor Corporation Common Stock [ECOM]',
  'Trade Type': 'Purchase',
  'Value': '$50,001 to $100,000',
  'Trade Date': '2022-03-08',
  'Senator': 'Thomas H Tuberville [Tuberville, Tommy]'},
 {'Trade': 'Nashville Soccer Holdings, LLC Company: Nashville Soccer Holdings, LLC \xa0(Nashville, TN) Description:\xa0Holds sports and entertainment interests.',
  'Trade Type': 'Purchase',
  'Value': '$1,000,001 to $5,000,000',
  'Trade Date': '2022-02-28',
  'Senator': 'William F Hagerty, IV [Hagerty, Bill]'}]

## 2. Screen by large equity purchase

In [44]:
# input is the string result of t['trade'] = trade_
def isEquity(trade_):
    regex = re.findall('\[(.*?)\]', trade_)
    return (
        'Common Stock' in trade_ or len(regex) != 0
    )
def isPurchase(trade_type_):
    return trade_type_ == 'Purchase'
def isLarge(value_):
    return value_[1] > 50001

In [45]:
def getLargeEquity(all_trades):
    large_trades = []
    for t in all_trades:
        if isLarge(t['value']) and isPurchase(t['trade type']) and isEquity(t['trade']):
            # clean up data for presenation
            # removed part to find how many days ago for simplicity for testing
            trade_date = t['trade date']
            value_string = '$' + (
                "{:,}".format(t['value'][0])
            ) + ' to $' + (
                "{:,}".format(t['value'][1])
            )
            large_trades.append(
                 {
                'Trade' : t['trade'],
                'Trade Type' : t['trade type'],
                'Value' : value_string,
                'Trade Date' : trade_date,
                'Senator' : t['senator']
                }
            )
    return large_trades

## 3. Small Cap Equity Stocks

In [46]:
def getTicker(trade_):
    return re.findall('\[(.*?)\]', trade_)[0]

In [47]:
def list_tickers(equity_trades):
    tickers = []
    for e in equity_trades:
        tickers.append(
            getTicker(e['Trade'])
        )
    return tickers

In [48]:
tickers = list_tickers(getLargeEquity(all_trades))
for t in tickers:
    print(t)

ECOM
AAPL
X
PYPL
GOLD
BABA
BABA
BABA
CLF
SCCO
INTC
X


### Determines if a ticker is a small cap stock

In [133]:
from bs4 import BeautifulSoup

In [126]:
def getHTML(url):
    r = fetchSession(url)
    h = r.text
    doc = BeautifulSoup(h, 'html.parser')
    return doc

def parseToMillions(value_string):
    unit = value_string[-1:]
    number = float(value_string[:-1])
    #keep in units of millions
    if unit == 'B':
        number = number * 1000
    elif unit == 'T':
        number = number * 1000000
    return number

In [135]:
def getMktCap(ticker):
    url = 'https://finance.yahoo.com/quote/{}/'.format(ticker)
    soup = getHTML(url)
    quote_summary = soup.find(id='quote-summary')
    tables = quote_summary.find_all('table')
    mc_table = tables[1]
    mc_rows = mc_table.find_all('td')
    mc_string = str(mc_rows[1])
    value = re.search('>(.*)<', mc_string).group(1)
    return round(parseToMillions(value),2)

In [137]:
for t in tickers:
    print(t, getMktCap(t))

ECOM 465.91
AAPL 2734000.0
X 9805.0
PYPL 121451.0
GOLD 44359.0
BABA 259735.0
BABA 259796.0
BABA 272001.0
CLF 16013.0
SCCO 57672.0
INTC 191190.0
X 9963.0
