In [137]:
from requests_html import HTMLSession
from lxml import html 
from datetime import date,datetime
import smtplib, ssl
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
import sys
import re 
from bs4 import BeautifulSoup
import nums_from_string

## Helper Methods

In [138]:
def fetchSession(url):
    session = HTMLSession()
    r = session.get(url)
    return r

def getTrades(r):
    table = r.html.find('table')[0]
    rows = table.find('tr')
    return rows[1:]

def value_to_ints(value):
    bad_chars = [
        ',','$','-'
    ]
    for c in bad_chars:
        value = value.replace(c,'')
    low, high = [
        int(x) for x in (value.split('  ', 1))
    ]
    return [low,high]

In [139]:
def getHTML(url):
    r = fetchSession(url)
    h = r.text
    doc = BeautifulSoup(h, 'html.parser')
    return doc

def getTicker(trade_):
    try:
        return re.findall('\[(.*?)\]', trade_)[0]
    except IndexError:
        return ''

In [140]:
def getFirstRowEntry(ticker):
    url = 'https://finance.yahoo.com/quote/{}/'.format(ticker)
    soup = getHTML(url)
    quote_summary = soup.find(id='quote-summary')
    if quote_summary is None:
        return ''
    tables = quote_summary.find_all('table')
    if len(tables) == 0:
        return ''
    # right side table
    mc_table = tables[1]
    # get all rows
    mc_rows = mc_table.find_all('td')
    # entire row 
    mc_string = str(mc_rows[1])
    return mc_string

def isStock(row_one):
    flag = 'data-test="(.*)-value'
    seach = re.search(
        flag, row_one
    )
    if seach is None:
        return -1
    marker = seach.group(1)
    if marker == 'MARKET_CAP':
        return 1
    elif marker == 'NET_ASSETS':
        return 0
    # N/A
    else:
        return -1

def parseToMillions(value_string):
    unit = value_string[-1:]
    number = nums_from_string.get_nums(value_string)[0]
    #keep in units of millions
    if unit == 'B':
        number = number * 1000
    elif unit == 'T':
        number = number * 1000000
    return number

def getNAVCAP(row_one):
    value = re.search('>(.*)<', row_one).group(1)
    if value == 'N/A':
        return -1
    return round(parseToMillions(value),2)

def isSmallCapStock(row_one):
    if isStock(row_one) == 1:
            mkt_cap = getNAVCAP(row_one)
            if mkt_cap < 2000 and mkt_cap > 0:
                return True
    return False

In [166]:
def writeToFile(trades):
    with open('data/daily_trades.txt', 'w') as f:
        for t in trades:
            for (key,item) in t.items():
                if key == 'Yahoo!':
                    f.write(
                        '%s\n' % (
                        item
                        )
                    )
                else:
                    f.write(
                        '%s : %s\n' % (
                        key,item
                        )
                    )
            f.write('\n')

def sendEmail():
    port = 465
    send_email = 'ders.mailbot@gmail.com'
    receive_email = 'andersseline15@gmail.com'
    # encrypt 
    password = 'Mailbot15'

    with open('data/daily_trades.txt', 'r') as f:
        data = f.read()

    # if the length of the string from the file is not 0, then there was a 
    # (major) trade executed today
    if len(data) != 0:
        print('major trade found.')
        message = MIMEMultipart('alternative')
        message['Subject'] = 'Trade Alert'
        message['From'] = 'SenateStockWatch'
        message['To'] = send_email # change post testing
        message['Bcc'] = receive_email
        body = MIMEText(data, 'plain')
        message.attach(body)

        context = ssl.create_default_context()
        with smtplib.SMTP_SSL('smtp.gmail.com', port, context=context) as server:
            server.login(send_email, password)
            server.sendmail(
                send_email, send_email, message.as_string()
            )
            print('mail sent.')
    else:
        print('no major trades.')

## Testing

### Method to scrape trades with mkt cap and purchase logic all together

In [142]:
def scrapeImportantTrades(today=datetime.today().date(), onlyToday=False):
    r = fetchSession('https://sec.report/Senate-Stock-Disclosures')
    # if website is down
    try:
        trades = getTrades(r)
    except IndexError:
        print('website may be down. quitting.')
        sys.exit(1)

    n = len(trades)
    all_trades = []

    for i in range(0,n,2):
        imp_trade = False
        l1_elements = trades[i].find('td')
        l2_elements = trades[i+1].find('td')[:-1]

        # ensure trade is a purchase, otherwise contniue to next trade
        trade_type = l2_elements[0].text.split('\n', 1)[0]
        if trade_type != 'Purchase':
            continue

        file_date, trade_date = l1_elements[0].text.split('\n')
        if file_date != today and onlyToday:
            break
        trade = l1_elements[1].text
        senator = l1_elements[2].text
        value = value_to_ints(l2_elements[1].text)
        
        ticker = getTicker(trade)
        if ticker != '':
            row_one = getFirstRowEntry(ticker)
            mkt_cap = getNAVCAP(row_one)
            small_mktCap = mkt_cap < 2000 and mkt_cap > 0
            medium_mktCap = mkt_cap >= 2000 and mkt_cap <= 10000
            large_mktCap = mkt_cap > 10000
            # any small caps, medium purchase medium caps, large purchase large cap
            if isStock(row_one) and small_mktCap:
                imp_trade = True
                cap_string = 'small'
            elif isStock(row_one) and medium_mktCap and value[0] >= 50000:
                imp_trade = True
                cap_string = 'medium'
            elif isStock(row_one) and large_mktCap and value[0] >= 100000:
                imp_trade = True
                cap_string = 'large'

            if imp_trade:
                url = 'https://finance.yahoo.com/quote/{}/'.format(ticker)
                trade_dict = {
                    'trade date' : trade_date,
                    'file date' : file_date,
                    'senator' : senator,
                    'trade' : trade,
                    'trade type' : trade_type,
                    'value' : value,
                    'mkt cap' : cap_string,
                    'yahoo finance' : url
                }
                all_trades.append(trade_dict)
    return all_trades

In [143]:
important_trades_all = scrapeImportantTrades()

In [148]:
for t in important_trades_all[0:2]:
    for key,value in t.items():
        print(key, ':', value)
    print('\n')

trade date : 2022-03-30
file date : 2022-04-08
senator : Thomas H Tuberville [Tuberville, Tommy]
trade : Limestone Bancorp, Inc. - Common Stock [LMST]
trade type : Purchase
value : [1001, 15000]
mkt cap : small
yahoo finance : https://finance.yahoo.com/quote/LMST/


trade date : 2022-03-30
file date : 2022-04-08
senator : Thomas H Tuberville [Tuberville, Tommy]
trade : First Guaranty Bancshares, Inc. - Common Stock [FGBI]
trade type : Purchase
value : [1001, 15000]
mkt cap : small
yahoo finance : https://finance.yahoo.com/quote/FGBI/




In [149]:
important_trades_today = scrapeImportantTrades(onlyToday=True)
if len(important_trades_today) != 0:
    print(important_trades_today)
else:
    print('no important trades today.')

no important trades today.


## Email Appearance

In [175]:
def cleanText(trades_list):
    trades_for_txt = []
    for t in trades_list:
        trade_date = str(t['trade date']) + ' (' + str((
                datetime.today().date() - datetime.strptime(
                    t['trade date'], '%Y-%m-%d'
                ).date()
            )).split(',')[0] + ' ago)'

        value_string = '$' + (
            "{:,}".format(t['value'][0])
        ) + ' to $' + (
            "{:,}".format(t['value'][1])
        )

        if t['mkt cap'] == 'small':
            mkt_cap_string = 'Small Cap (Under $2B)'
        elif t['mkt cap'] == 'medium':
            mkt_cap_string = 'Medium Cap ($2B to $10B)'
        else:
            mkt_cap_string = 'Large Cap (Over $10B)'

        trades_for_txt.append(
            {
                'Trade Date' : trade_date,
                'File Date' : t['file date'],
                'Senator' : t['senator'],
                'Equity' : t['trade'],
                'Trade Value' : value_string,
                'Market Cap' : mkt_cap_string,
                'Yahoo!' : t['yahoo finance']
            }
        )
    return trades_for_txt

In [176]:
cleaned_trades = cleanText(important_trades_all)
for key,value in cleaned_trades[0].items():
    if key == 'Yahoo!':
        print(value)
        continue
    print(key, ':', value)

Trade Date : 2022-03-30 (22 days ago)
File Date : 2022-04-08
Senator : Thomas H Tuberville [Tuberville, Tommy]
Equity : Limestone Bancorp, Inc. - Common Stock [LMST]
Trade Value : $1,001 to $15,000
Market Cap : Small Cap (Under $2B)
https://finance.yahoo.com/quote/LMST/


In [177]:
writeToFile(cleaned_trades)

In [178]:
sendEmail()

major trade found.
mail sent.


## Getting Links to Google News Articles

In [194]:
# takes trade line and converts to url formatting
def cleanURLQuery(trade):
    return trade.replace(' ', '%20').replace(',', '').replace('[','%5B').replace(']','%5D')

In [195]:
urls = []
# ... limestone%20bancorp%20inc.%20-%20common%20stock%20%5Blmst%5 ... 
for t in important_trades_all:
    urls.append(
        'https://news.google.com/search?q={}&hl=en-US&gl=US&ceid=US%3Aen'.format(
            cleanURLQuery(t['trade'])
        )
    )

In [218]:
url = urls[0]
soup = getHTML(url)
main = soup.findAll('main')
# articles = soup.find('div', id='jslog')
main

[<main class="HKt8rc CGNRMc" jslog="94187; 3:W251bGwsbnVsbCxudWxsLG51bGwsbnVsbCxudWxsLDE0MSxudWxsLG51bGwsbnVsbCwzN10=" jsname="fjw8sb"><c-wiz c-wiz="" class="FffXzd" data-n-ca-it="Limestone Bancorp Inc. - Common Stock [LMST]" data-n-et="406" data-n-ham="true" data-n-q="Limestone Bancorp Inc. - Common Stock [LMST]" data-node-index="0;0" data-p='%.@."Limestone Bancorp Inc. - Common Stock [LMST]",[["en-US","US",["SPORTS_FULL_COVERAGE","WEB_TEST_1_0_0"],null,[],1,1,"US:en"],"en-US","US",true,[2,4,28],1,true,"442274222",false,false],1]' jsaction="rcuQ6b:npT2md;" jscontroller="o2EnYc" jsdata="deferred-i2" jsmodel="hc6Ubd GITtrd hT8rr" jsrenderer="vBVNjc" view=""><div class="lBwEZb BL5WZb xP6mwf" data-n-et="200" data-n-ham="true" jscontroller="SpTAFc" jsdata="tbf4if;ui|124+97e5c89c-bca0-4f25-8cff-331887764f2d,ui|124+48ac1620-64d0-4573-b91b-9cdf41d52d30;3" jsmodel="dPwZPd hT8rr" jsname="esK7Lc"><div class="NiLAwe y6IFtc R7GTQ keNKEd j7vNaf" jslog="93789; 3:W251bGwsbnVsbCxudWxsLG51bGwsIiIsbnVsb