In [114]:
from bs4 import BeautifulSoup, SoupStrainer, BeautifulStoneSoup
import datetime
import unicodedata
import requests
import pandas as pd
import quandl
import config
import dateutil.relativedelta
import re
import numpy as np

In [127]:
# S&P 500 index data downloaded from Yahoo Finance GSPC
gspc_df = pd.read_csv("Data/gspc.csv",parse_dates=['Date'],index_col="Date")

def get_index_price(input_date,market_open):
    if market_open == True:
        price = gspc_df.loc[gspc_df.index==np.datetime64(input_date.date()),"Open"]  
    else:
        price = gspc_df.loc[gspc_df.index==np.datetime64(input_date.date()),"Adj Close"] 
        
    return price

In [4]:
# Returns Dataframe of document links for a given CIK
def get_sec_docs(cik="0001065088"):
    base_url = "https://www.sec.gov/cgi-bin/browse-edgar"
    inputted_cik = cik
    payload = {
        "action" : "getcompany",
        "CIK" : inputted_cik,
        "type" : "8-K",
        "output":"xml"
        #"dateb" : "20180331",
        #"count" : "100",
        #"owner" : "include"
    }
    sec_response = requests.get(url=base_url,params=payload)
    soup = BeautifulSoup(sec_response.text,'lxml')
    url_list = soup.findAll('filinghref')
    html_list = []
    # Get html version of links
    for link in url_list:
        link = link.string
        if link.split(".")[len(link.split("."))-1] == 'htm':
            txtlink = link + "l"
            html_list.append(txtlink)

    doc_list = []
    doc_name_list = []
    # Get links for txt versions of files
    for k in range(len(html_list)):
        txt_doc = html_list[k].replace("-index.html",".txt")
        doc_name = txt_doc.split("/")[-1]
        doc_list.append(txt_doc)
        doc_name_list.append(doc_name)
        # Create dataframe of CIK, doc name, and txt link
    df = pd.DataFrame(
        {
        "cik" : [cik]*len(html_list),
        "txt_link" : doc_list,
        "doc_name": doc_name_list
        }
    )
    return df

In [36]:
# Extracts text and submission datetime from document link
def extract_text(link):
    r = requests.get(link)
    #Parse 8-K document
    filing = BeautifulSoup(r.content,"html.parser",from_encoding="ascii")
    #Extract datetime
    submission_dt = filing.find("acceptance-datetime").string[:14]
    #Extract HTML sections
    submission_dt = datetime.datetime.strptime(submission_dt,"%Y%m%d%H%M%S")
    for section in filing.findAll("html"):
        #Remove tables
        for table in section("table"):
            table.decompose()
        #Convert to unicode
        section = unicodedata.normalize("NFKD",section.text)
        section = section.replace("\t"," ").replace("\n"," ").replace("/s"," ").replace("\'","'")
    filing = "".join((section))
    
    return filing, submission_dt

In [130]:
#Authenticate with API KEY
quandl.ApiConfig.api_key = config.api_key # YOUR API KEY HERE

def get_quandl_data(ticker,end_date,market_open):
    if market_open == True:
        quandl_param = "WIKI/" + ticker + ".8"  
    else:
        quandl_param = "WIKI/" + ticker + ".11" 
   
    end_date = datetime.datetime.strftime(end_date,"%Y-%m-%d") 
    price = quandl.get(quandl_param,start_date=end_date,end_date=end_date).values[0,0]
    return price

def get_movement(ticker,start_date,release_date):
    market_close = release_date.replace(hour=16,minute=0,second=0)
    market_open = release_date.replace(hour=9,minute=30,second=0)
    
    # If report is released after market hours, change of close and next day open
    if release_date > market_close:
        start_date = release_date
        end_date = release_date + datetime.timedelta(days=1)
        end_date = weekday_check(end_date)
        price_before_release = get_quandl_data(ticker,start_date,market_open=False)
        price_after_release = get_quandl_data(ticker,end_date,market_open=True)
        
        index_before_release = get_index_price(start_date,market_open=False)
        index_after_release = get_index_price(end_date,market_open=True)
        
    # If report is released before market hours, take change of yesterday's close and today's open
    elif end_date < market_open:
        start_date = release_date - datetime.timedelta(days=1)
        start_date = weekday_check(start_date)
        end_date = release_date
        
        price_before_release = get_quandl_data(ticker,start_date,market_open=False)
        price_after_release = get_quandl_data(ticker,end_date,market_open=True) 
        
        index_before_release = get_index_price(start_date,market_open=False)
        index_after_release = get_index_price(end_date,market_open=True)
        
    else:
        start_date = release_date - datetime.timedelta(days=1)
        end_date = release_date
        price_before_release = get_quandl_data(ticker,start_date,market_open=False)
        price_after_release = get_quandl_data(ticker,end_date,market_open=False)
        
        index_before_release = get_index_price(start_date,market_open=False)
        index_after_release = get_index_price(end_date,market_open=False)
        
    price_pct_change = calculate_pct_change(price_after_release,price_before_release)
    index_pct_change = calculate_pct_change(index_after_release,index_before_release)
def get_recent_movements(ticker,end_date):
    end_date = datetime.datetime.strftime(end_date,"%Y-%m-%d")
    quandl_param = "WIKI/" + ticker + ".11"
    close_price = quandl.get(quandl_param,start_date=end_date,end_date=end_date).values[0,0]
    return close_price

def calculate_pct_change(end_value,start_value):
    pct_change = (end_value - start_value) / start_value
    pct_change = round(pct_change,4)
    return pct_change

def weekday_check(date):
    # If date is Saturday or Sunday, reset date to the preceding Friday
    if date.isoweekday() == 6:
        date = date + datetime.timedelta(days=-1)
    elif date.isoweekday() == 7:
        date = date + datetime.timedelta(days=-2)
    return date

def get_closing_price(ticker="EBAY",release_date=dt):
    #Setup variables
    previous_closes = dict()
   
    #1 Week
    previous_closes["week_before"] = release_date + datetime.timedelta(weeks=-1)
    #1 Month
    previous_closes["month_before"] = release_date + dateutil.relativedelta.relativedelta(months=-1)
    #1 Quarter
    previous_closes["quarter_before"] = release_date + dateutil.relativedelta.relativedelta(months=-3)
    #1 Year
    previous_closes["year_before"] = release_date +  dateutil.relativedelta.relativedelta(years=-1)
    
    #Check if date falls on a weekend
    release_date = weekday_check(release_date)

    #Get close price from Quandl
    end_date_price = get_quandl_data(ticker,release_date,market_open=False)
    
    #Check if date falls on a weekend and get closing price
    for date in previous_closes.keys():
        previous_closes[date] = weekday_check(previous_closes[date])     
        #Get closing prices
        previous_closes[date] = get_quandl_data(ticker,previous_closes[date],market_open=False)
    return previous_closes

In [7]:
ebay_df = get_sec_docs()

In [37]:
corpus, dt = extract_text(ebay_df['txt_link'][1])

In [82]:
get_closing_price()

{'month_before': 37.74,
 'quarter_before': 37.64,
 'week_before': 39.82,
 'year_before': 31.83}