
# Capital Trades Buy/Sell Data Scrape

In [13]:
import pandas as pd 
from requests_html import HTMLSession
import os
import requests
from lxml import html 

In [14]:
session = HTMLSession()
num_poli = 20
url = 'https://app.capitoltrades.com/trades?page=1&pageSize=' + str(num_poli)
r = session.get(url)
print(r)

<Response [200]>


In [15]:
table = r.html.find('table')[0]
rows = table.find('tr')[1:num_poli + 1] # first 'num_poli' items in table, top is header
full_output = ''
# sum all rows to create one block of text from HTML
for i in rows:
    full_output = full_output + i.text
full_output = full_output.replace('\n', '')

In [16]:
# divide chunk of text in table into slices, each corresponding to a single trade
def makeSlices(fullString, endKey):
    start = 0
    end = 0
    slices = []
    while len(fullString) != 0:
        end = fullString.find(endKey) + len(endKey)
        slices.append(
            fullString[start:end]
        )
        fullString = fullString[end:]
    return slices

# extract important trade information given one of the slices made above 
def getTradeInformation(data):
    # determine if representative or senator 
    if 'Rep' in data:
        name = data[
            data.find('Name') + len('Name'):
            data.find('Rep')
        ]
        office = data[
            data.find('Rep'):
            data.find('Owner')
        ]
    elif 'Sen' in data:
        name = data[
            data.find('Name') + len('Name'):
            data.find('Sen')
        ]
        office = data[
            data.find('Sen'):
            data.find('Owner')
        ]
    ticker = data[
        data.find('Ticker') + len('Ticker'):
        data.find('Transaction')
    ]
    transaction = data[
        data.rfind('Transaction') + len('Transaction'):
        data.rfind('Shares')
    ]
    value = data[
        data.find('Value Range') + len('Value Range'):
        data.find('Url')
    ]
    trade_data = {
        'name' : name,
        'pos' : office,
        'ticker' : ticker,
        'trans' : transaction,
        'value' : value
    }
    return trade_data
    
slices = makeSlices(full_output, 'Url')
tradeSlips = []
for s in slices:
    trade = getTradeInformation(s)
    tradeSlips.append(trade)
    print(trade)

{'name': 'Allen, Richard Wayne (Rick) ', 'pos': 'Rep (R - GA)', 'ticker': 'DIS', 'trans': 'Sell (Stock) *', 'value': '$1,001 - $15,000'}
{'name': 'Allen, Richard Wayne (Rick) ', 'pos': 'Rep (R - GA)', 'ticker': 'PSDTX', 'trans': 'Sell (Mutual Fund) *', 'value': '$50,001 - $100,000'}
{'name': 'Allen, Richard Wayne (Rick) ', 'pos': 'Rep (R - GA)', 'ticker': 'SBUX', 'trans': 'Sell (Stock) *', 'value': '$15,001 - $50,000'}
{'name': 'Kustoff, David Frank ', 'pos': 'Rep (R - TN)', 'ticker': 'AMD', 'trans': 'Buy (Stock)', 'value': '$1,001 - $15,000'}
{'name': 'Kustoff, David Frank ', 'pos': 'Rep (R - TN)', 'ticker': 'AMZN', 'trans': 'Buy (Stock)', 'value': '$15,001 - $50,000'}
{'name': 'Kustoff, David Frank ', 'pos': 'Rep (R - TN)', 'ticker': 'MSFT', 'trans': 'Buy (Stock)', 'value': '$1,001 - $15,000'}
{'name': 'Burgess, Michael Clifton ', 'pos': 'Rep (R - TX)', 'ticker': 'SYK', 'trans': 'Sell (Stock Options) *', 'value': '$1,001 - $15,000'}
{'name': 'Lowenthal, Alan Stuart ', 'pos': 'Rep (D 

In [17]:
# turn value range from string to list of one to two integers, formatted $XX - $XX or $XX
def parseValue(valueStr):
    if '-' in valueStr:
        lower, upper = valueStr.split(' - ')
        lower = int(
            ''.join(
                filter(
                    lambda x: x.isdigit(), lower
                )
            )
        )
        upper = int(
            ''.join(
                filter(
                    lambda x: x.isdigit(), upper
                )
            )
        )
        return upper
    else:
        valueStr = int(
            ''.join(
                filter(
                    lambda x: x.isdigit(), valueStr
                )
            )
        )
        return valueStr

# define significant buys as anything a senator buys, or any purchase value of $15,000
def getSignificantBuys(tradeSlips):
    sigBuys = []
    for t in tradeSlips:
        if 'Buy' in t['trans'] and (parseValue(t['value']) > 15000 or 'Sen' in t['pos']):
            sigBuys.append(t)
    return sigBuys

sigBuys = getSignificantBuys(tradeSlips)

In [18]:
for b in sigBuys:
    print(b)

{'name': 'Kustoff, David Frank ', 'pos': 'Rep (R - TN)', 'ticker': 'AMZN', 'trans': 'Buy (Stock)', 'value': '$15,001 - $50,000'}
{'name': 'Boozman, John Nichols ', 'pos': 'Sen (R - AR)', 'ticker': 'IEF', 'trans': 'Buy (ETF)', 'value': '$1,001 - $15,000'}
{'name': 'Boozman, John Nichols ', 'pos': 'Sen (R - AR)', 'ticker': 'NFRA', 'trans': 'Buy (ETF)', 'value': '$1,001 - $15,000'}
{'name': 'Boozman, John Nichols ', 'pos': 'Sen (R - AR)', 'ticker': 'SLV', 'trans': 'Buy (ETF)', 'value': '$1,001 - $15,000'}
{'name': 'Boozman, John Nichols ', 'pos': 'Sen (R - AR)', 'ticker': 'TPYP', 'trans': 'Buy (ETF)', 'value': '$1,001 - $15,000'}
