In [9]:
import pandas as pd 
from requests_html import HTMLSession
from lxml import html 
from datetime import date,datetime
import smtplib, ssl
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
import sys
import re 
from bs4 import BeautifulSoup
import nums_from_string

## Helper Methods

In [10]:
def fetchSession(url):
    session = HTMLSession()
    r = session.get(url)
    return r

def getTrades(r):
    table = r.html.find('table')[0]
    rows = table.find('tr')
    return rows[1:]

def arr_to_dict(lst):
    it = iter(lst)
    res_dict = dict(zip(it,it))
    return res_dict 

def value_to_ints(value):
    bad_chars = [
        ',','$','-'
    ]
    for c in bad_chars:
        value = value.replace(c,'')
    low, high = [
        int(x) for x in (value.split('  ', 1))
    ]
    return [low,high]

In [11]:
def scrapeAllTradesToday():
    r = fetchSession('https://sec.report/Senate-Stock-Disclosures')
    # if website is down
    try:
        trades = getTrades(r)
    except IndexError:
        print('website may be down. quitting.')
        sys.exit(1)

    n = len(trades)
    all_trades = []
    l1_head = [
        'trade date', 'file date', 'trade', 'senator'
    ]
    l2_head = [
        'trade type', 'value'
    ]
    current = True
    while current:
        for i in range(0,n,2):
            trade = []
            l1_elements = trades[i].find('td')
            l2_elements = trades[i+1].find('td')[:-1]
            file_date, trade_date = l1_elements[0].text.split('\n')
            trade_snip = l1_elements[1].text
            senator = l1_elements[2].text
            l1_cleaned = [
                trade_date,file_date,trade_snip,senator
            ]
            for h,e in zip(l1_head, l1_cleaned):
                trade.append(h)
                trade.append(e)
            for h,e in zip(l2_head, l2_elements):
                trade.append(h)
                trade.append(e.text)
            if str(datetime.today()) != trade[3]:
                current = False
                break
            trade[9] = trade[9].split('\n', 1)[0]
            trade[11] = value_to_ints(trade[11])
            trade = arr_to_dict(trade)
            all_trades.append(trade)
    return all_trades

# testing use
def scrapeAllTradesOnSite():
    r = fetchSession('https://sec.report/Senate-Stock-Disclosures')
    # if website is down
    try:
        trades = getTrades(r)
    except IndexError:
        print('website may be down. quitting.')
        sys.exit(1)

    n = len(trades)
    all_trades = []
    l1_head = [
        'trade date', 'file date', 'trade', 'senator'
    ]
    l2_head = [
        'trade type', 'value'
    ]
    for i in range(0,n,2):
        trade = []
        l1_elements = trades[i].find('td')
        l2_elements = trades[i+1].find('td')[:-1]
        file_date, trade_date = l1_elements[0].text.split('\n')
        trade_snip = l1_elements[1].text
        senator = l1_elements[2].text
        l1_cleaned = [
            trade_date,file_date,trade_snip,senator
        ]
        for h,e in zip(l1_head, l1_cleaned):
            trade.append(h)
            trade.append(e)
        for h,e in zip(l2_head, l2_elements):
            trade.append(h)
            trade.append(e.text)
        trade[9] = trade[9].split('\n', 1)[0]
        trade[11] = value_to_ints(trade[11])
        trade = arr_to_dict(trade)
        all_trades.append(trade)
    return all_trades


In [16]:
def getHTML(url):
    r = fetchSession(url)
    h = r.text
    doc = BeautifulSoup(h, 'html.parser')
    return doc

def getTicker(trade_):
    try:
        return re.findall('\[(.*?)\]', trade_)[0]
    except IndexError:
        return ''

In [24]:
def getFirstRowEntry(ticker):
    url = 'https://finance.yahoo.com/quote/{}/'.format(ticker)
    soup = getHTML(url)
    quote_summary = soup.find(id='quote-summary')
    if quote_summary is None:
        return ''
    tables = quote_summary.find_all('table')
    if len(tables) == 0:
        return ''
    # right side table
    mc_table = tables[1]
    # get all rows
    mc_rows = mc_table.find_all('td')
    # entire row 
    mc_string = str(mc_rows[1])
    return mc_string

def isStock(row_one):
    flag = 'data-test="(.*)-value'
    seach = re.search(
        flag, row_one
    )
    if seach is None:
        return -1
    marker = seach.group(1)
    if marker == 'MARKET_CAP':
        return 1
    elif marker == 'NET_ASSETS':
        return 0
    # N/A
    else:
        return -1

def parseToMillions(value_string):
    unit = value_string[-1:]
    number = nums_from_string.get_nums(value_string)[0]
    #keep in units of millions
    if unit == 'B':
        number = number * 1000
    elif unit == 'T':
        number = number * 1000000
    return number

def getNAVCAP(row_one):
    value = re.search('>(.*)<', row_one).group(1)
    if value == 'N/A':
        return -1
    return round(parseToMillions(value),2)

def isSmallCapStock(row_one):
    if isStock(row_one) == 1:
            mkt_cap = getNAVCAP(row_one)
            if mkt_cap < 2000 and mkt_cap > 0:
                return True
    return False

In [14]:
def writeToFile(trades):
    with open('data/daily_trades.txt', 'w') as f:
        for t in trades:
            for (key,item) in t.items():
                f.write(
                    '%s : %s\n' % (
                       key,item
                    )
                )
            f.write('\n')

def sendEmail():
    port = 465
    send_email = 'ders.mailbot@gmail.com'
    receive_email = 'andersseline15@gmail.com'
    # encrypt 
    password = 'Mailbot15'

    with open('data/daily_trades.txt', 'r') as f:
        data = f.read()

    # if the length of the string from the file is not 0, then there was a 
    # (major) trade executed today
    if len(data) != 0:
        print('major trade found.')
        message = MIMEMultipart('alternative')
        message['Subject'] = 'Trade Alert'
        message['From'] = 'SenateStockWatch'
        message['To'] = send_email # change post testing
        message['Bcc'] = '' # for other recipients
        body = MIMEText(data, 'plain')
        message.attach(body)

        context = ssl.create_default_context()
        with smtplib.SMTP_SSL('smtp.gmail.com', port, context=context) as server:
            server.login(send_email, password)
            server.sendmail(
                send_email, send_email, message.as_string()
            )
            print('mail sent.')
    else:
        print('no major trades.')

## Testing

### Get all large purchases / small cap equity purchases from trades list 

In [None]:
trades = scrapeAllTradesOnSite()
important_trades =[]
for t in trades:
    if t['trade type'] == 'Purchase':
        tick = getTicker(t['trade'])
        # if found a valid ticker of a purchased equity / etf
        if tick != '':
            # regardless of mkt cap or equity/etf, add any trades of volume over $50,000
            if t['value'][0] >= 50000:
                print(t)
                important_trades.append(t)
                continue # skip rest of the loop 
            row_one = getFirstRowEntry(tick)
            mktCap = getNAVCAP(row_one)
            valid_mktCap = mktCap < 2000 and mktCap > 0
            # if is small cap stock purchase, regardless of purchase size 
            if isStock(row_one) and valid_mktCap:
                print(tick, mktCap)
                print(t)
                important_trades.append(t)

### Method to scrape trades with mkt cap and purchase logic all together

In [65]:
def scrapeAllImportantTradesOnSite():
    r = fetchSession('https://sec.report/Senate-Stock-Disclosures')
    # if website is down
    try:
        trades = getTrades(r)
    except IndexError:
        print('website may be down. quitting.')
        sys.exit(1)

    n = len(trades)
    all_trades = []

    for i in range(0,n,2):
        imp_trade = False
        trade_info = []
        l1_elements = trades[i].find('td')
        l2_elements = trades[i+1].find('td')[:-1]

        # ensure trade is a purchase, otherwise contniue to next trade
        trade_type = l2_elements[0].text.split('\n', 1)[0]
        if trade_type != 'Purchase':
            continue

        file_date, trade_date = l1_elements[0].text.split('\n')
        trade = l1_elements[1].text
        senator = l1_elements[2].text
        value = value_to_ints(l2_elements[1].text)
        
        ticker = getTicker(trade)
        if ticker != '':
            row_one = getFirstRowEntry(ticker)
            mkt_cap = getNAVCAP(row_one)
            small_mktCap = mkt_cap <= 2000 and mkt_cap > 0
            medium_mktCap = mkt_cap > 2000 and mkt_cap <= 10000
            large_mktCap = mkt_cap > 10000
            # any small caps, medium purchase medium caps, large purchase large cap
            if isStock(row_one) and small_mktCap:
                imp_trade = True
                cap_string = 'small'
            elif isStock(row_one) and medium_mktCap and value[0] >= 15000:
                imp_trade = True
                cap_string = 'medium'
            elif isStock(row_one) and large_mktCap and value[0] >= 50000:
                imp_trade = True
                cap_string = 'large'

            if imp_trade:
                url = 'https://finance.yahoo.com/quote/{}/'.format(ticker)
                trade_dict = {
                    'trade date' : trade_date,
                    'file date' : file_date,
                    'senator' : senator,
                    'trade' : trade,
                    'trade type' : trade_type,
                    'value' : value,
                    'mkt cap' : cap_string,
                    'yahoo finance' : url
                }
                all_trades.append(trade_dict)
    return all_trades

In [66]:
important_trades = scrapeAllImportantTradesOnSite()

In [77]:
i = 1
for t in important_trades:
    print('Trade %d:' % i)
    for key,value in t.items():
        print(key, ':', value)
    print('\n')
    i += 1

Trade 1:
trade date : 2022-03-30
file date : 2022-04-08
senator : Thomas H Tuberville [Tuberville, Tommy]
trade : Limestone Bancorp, Inc. - Common Stock [LMST]
trade type : Purchase
value : [1001, 15000]
mkt cap : small
yahoo finance : https://finance.yahoo.com/quote/LMST/


Trade 2:
trade date : 2022-03-30
file date : 2022-04-08
senator : Thomas H Tuberville [Tuberville, Tommy]
trade : First Guaranty Bancshares, Inc. - Common Stock [FGBI]
trade type : Purchase
value : [1001, 15000]
mkt cap : small
yahoo finance : https://finance.yahoo.com/quote/FGBI/


Trade 3:
trade date : 2022-03-30
file date : 2022-04-08
senator : Thomas H Tuberville [Tuberville, Tommy]
trade : Humacyte, Inc. - Common Stock [HUMA]
trade type : Purchase
value : [15001, 50000]
mkt cap : small
yahoo finance : https://finance.yahoo.com/quote/HUMA/


Trade 4:
trade date : 2022-03-21
file date : 2022-04-08
senator : Thomas H Tuberville [Tuberville, Tommy]
trade : ChannelAdvisor Corporation Common Stock [ECOM]
trade type 

In [None]:
writeToFile(important_trades)