
# Capital Trades Buy/Sell Data Scrape

In [1]:
import pandas as pd 
from requests_html import HTMLSession
import os
import requests
from lxml import html 
import csv

In [2]:
num_poli = 100
session = HTMLSession()
url = 'https://app.capitoltrades.com/trades?page=1&pageSize=' + str(num_poli)
r = session.get(url)
print(r)

<Response [200]>


In [3]:
table = r.html.find('table')[0]
rows = table.find('tr')[1:num_poli + 1] # first 'num_poli' items in table, top is header
full_output = ''
# sum all rows to create one block of text from HTML
for i in rows:
    full_output = full_output + i.text
full_output = full_output.replace('\n', '')

In [5]:
# divide chunk of text in table into slices, each corresponding to a single trade
def makeSlices(fullString, endKey):
    slices = []
    while len(fullString) != 0:
        end = fullString.find(endKey) + len(endKey)
        slices.append(
            fullString[0:end]
        )
        fullString = fullString[end:]
    return slices

# extract important trade information given one of the slices made above 
def getTradeInformation(data):
    # determine if representative or senator 
    if 'Rep' in data:
        name = data[
            data.find('Name') + len('Name'):
            data.find('Rep')
        ]
        office = data[
            data.find('Rep'):
            data.find('Owner')
        ]
    elif 'Sen' in data:
        name = data[
            data.find('Name') + len('Name'):
            data.find('Sen')
        ]
        office = data[
            data.find('Sen'):
            data.find('Owner')
        ]
    ticker = data[
        data.find('Ticker') + len('Ticker'):
        data.find('Transaction')
    ]
    transaction = data[
        data.rfind('Transaction') + len('Transaction'):
        data.rfind('Shares')
    ]
    value = data[
        data.find('Value Range') + len('Value Range'):
        data.find('Url')
    ]
    trade_data = {
        'name' : name,
        'pos' : office,
        'ticker' : ticker,
        'trans' : transaction,
        'value' : value
    }
    return trade_data
    
#slices = makeSlices(full_output, 'Url')
#tradeSlips = []
#for s in slices:
#    trade = getTradeInformation(s)
#    tradeSlips.append(trade)
#    print(trade)

In [6]:
print(full_output)

Pe
te
r 
Se
ss
io
ns
Re
pu
bl
ic
an
Ho
us
eT
XN
ot
Di
sc
lo
se
dt
od
ay
20
21
-1
2-
01
83
da
ys
N/
AV
IR
GI
NI
A 
TO
BA
CC
O 
SE
TT
LE
ME
NT
 F
IN
AN
CI
NG
 C
OR
PO
RA
TI
ON
se
ll
pa
rt
ia
l1
00
K 
- 
25
0K
N/
AV
ie
wP
et
er
 S
es
si
on
sR
ep
ub
li
ca
nH
ou
se
TX
No
tD
is
cl
os
ed
to
da
y2
02
1-
12
-0
18
3d
ay
sX
:U
SU
ni
te
d 
St
at
es
 S
te
el
 C
or
ps
el
l1
K 
- 
15
KN
/A
Vi
ew
Pe
te
r 
Se
ss
io
ns
Re
pu
bl
ic
an
Ho
us
eT
XN
ot
Di
sc
lo
se
d2
 d
ay
s 
ag
o2
02
2-
02
-1
80
da
ys
MS
FT
:U
SM
ic
ro
so
ft
 C
or
pb
uy
1K
 -
 1
5K
28
7.
93
Vi
ew
Pe
te
r 
Se
ss
io
ns
Re
pu
bl
ic
an
Ho
us
eT
XN
ot
Di
sc
lo
se
d2
 d
ay
s 
ag
o2
02
2-
02
-1
80
da
ys
AA
PL
:U
SA
pp
le
 I
nc
bu
y1
K 
- 
15
K1
67
.3
Vi
ew
De
bo
ra
h 
Ro
ss
De
mo
cr
at
Ho
us
eN
CJ
oi
nt
2 
da
ys
 a
go
20
22
-0
1-
27
23
da
ys
MS
FT
:U
SM
ic
ro
so
ft
 C
or
ps
el
lp
ar
ti
al
1K
 -
 1
5K
29
9.
84
Vi
ew
De
bo
ra
h 
Ro
ss
De
mo
cr
at
Ho
us
eN
CJ
oi
nt
2 
da
ys
 a
go
20
22
-0
1-
21
29
da
ys
MS
FT
:U
SM
ic
ro
so
ft
 C
or
ps
el
lp
ar
ti
a

In [None]:
# turn value range from string to list of one to two integers, formatted $XX - $XX or $XX
def parseValue(valueStr):
    if '-' in valueStr:
        lower, upper = valueStr.split(' - ')
        lower = int(
            ''.join(
                filter(
                    lambda x: x.isdigit(), lower
                )
            )
        )
        upper = int(
            ''.join(
                filter(
                    lambda x: x.isdigit(), upper
                )
            )
        )
        return upper
    else:
        valueStr = int(
            ''.join(
                filter(
                    lambda x: x.isdigit(), valueStr
                )
            )
        )
        return valueStr

# define significant buys as anything a senator buys, or any purchase value of $15,000
def getSignificantBuys(tradeSlips):
    sigBuys = []
    for t in tradeSlips:
        if 'Buy' in t['trans'] and (parseValue(t['value']) > 15000 or 'Sen' in t['pos']):
            sigBuys.append(t)
    return sigBuys

sigBuys = getSignificantBuys(tradeSlips)

In [None]:
# open file which stores significant buy data, clear it, and write with new data if any 
with open('data/buy_data.csv','w') as csvfile:
    csvfile.truncate(0)
    writer = csv.DictWriter(
        csvfile, fieldnames = [
            'name', 'pos', 'ticker', 'trans', 'value'
        ]
    )
    writer.writeheader()
    writer.writerows(sigBuys)

In [None]:
sig_buys = pd.read_csv('data/buy_data.csv')
if len(sig_buys) > 0:
    sig_buys.head()
else:
    print('No Significant Buys.')

No Significant Buys.
