In [1]:
# -*- coding: utf-8 -*-

import pandas as pd
import numpy as np
import urllib.request as ur
from bs4 import BeautifulSoup
import warnings

In [2]:
ticker = 'TSLA'

In [3]:
# Begin yahoo_income_statement(ticker)
# =============================================================================

In [4]:
# ## Read the yahoo income statement url
income_url = f'https://finance.yahoo.com/quote/{ticker}/financials?p={ticker}'

read_url = ur.urlopen(income_url).read()

# BeautifulSoup the xml
income_soup = BeautifulSoup(read_url, 'lxml')

In [5]:
# ## Find relevant data structures for financials
div_list = []

# Find all HTML data structures that are divs
for div in income_soup.find_all('div'):
    # Get the contents and titles
    div_list.append(div.string)

    # Prevent duplicate titles
    if not div.string == div.get('title'):
        div_list.append(div.get('title'))

In [6]:
# ## Filter out irrelevant data
# Exclude 'Operating Expenses' and 'Non-recurring Events'
div_list = [incl for incl in div_list if incl not in
            ('Operating Expenses', 'Non-recurring Events', 'Expand All')]

# Filter out 'empty' elements
div_list = list(filter(None, div_list))

# Filter out functions
div_list = [incl for incl in div_list if not incl.startswith('(function')]

# Sublist the relevant financial information
income_list = div_list[13: -5]

# Insert "Breakdown" to the beginning of the list to give it the proper stucture
income_list.insert(0, 'Breakdown')

In [7]:
# ## Create a DataFrame of the financial data

# Store the financial items as a list of tuples
income_data = list(zip(*[iter(income_list)]*6))

# Create a DataFrame
income_df = pd.DataFrame(income_data)

# Make the top row the headers
headers = income_df.iloc[0]
income_df = income_df[1:]
income_df.columns = headers
income_df.set_index('Breakdown', inplace=True, drop=True)

warnings.warn('Amounts are in thousands.')

print(income_df)

0                                                          ttm  12/31/2019  \
Breakdown                                                                    
Total Revenue                                       25,708,000  24,578,000   
Cost of Revenue                                     20,625,000  20,509,000   
Gross Profit                                         5,083,000   4,069,000   
Operating Expense                                    3,865,000   3,989,000   
Operating Income                                     1,218,000      80,000   
Net Non Operating Interest Income Expense             -651,000    -641,000   
Other Income Expense                                     3,000    -104,000   
Pretax Income                                          570,000    -665,000   
Tax Provision                                           91,000     110,000   
Net Income Common Stockholders                         368,000    -862,000   
Diluted NI Available to Com Stockholders               368,000  

  from ipykernel import kernelapp as app


In [8]:
# End yahoo_income_statement(ticker)
# =============================================================================

In [9]:
# Begin yahoo_balance_sheet(ticker):
# =============================================================================

In [10]:
# ## Read the yahoo balance sheet url
balancesheet_url = f'https://finance.yahoo.com/quote/{ticker}/balance-sheet?p={ticker}'

read_url = ur.urlopen(balancesheet_url).read()

# BeautifulSoup the xml
balancesheet_soup = BeautifulSoup(read_url, 'lxml')

In [11]:
# ## Find relevant data structures for balance sheet
div_list = []

# Find all HTML data structures that are divs
for div in balancesheet_soup.find_all('div'):
    # Get the contents and titles
    div_list.append(div.string)

    # Prevent duplicate titles
    if not div.string == div.get('title'):
        div_list.append(div.get('title'))

In [12]:
# ## Filter out irrelevant data
# Exclude 'Operating Expenses' and 'Non-recurring Events'
div_list = [incl for incl in div_list if incl not in
            ('Operating Expenses', 'Non-recurring Events', 'Expand All')]

# Filter out 'empty' elements
div_list = list(filter(None, div_list))

# Filter out functions
div_list = [incl for incl in div_list if not incl.startswith('(function')]

# Sublist the relevant financial information
balancesheet_list = div_list[13: -5]

# Insert "Breakdown" to the beginning of the list to give it the proper stucture
balancesheet_list.insert(0, 'Breakdown')

In [13]:
# ## Create a DataFrame of the financial data

# Store the financial items as a list of tuples
balancesheet_data = list(zip(*[iter(balancesheet_list)]*5))

# Create a DataFrame
balancesheet_df = pd.DataFrame(balancesheet_data)

# Make the top row the headers
headers = balancesheet_df.iloc[0]
balancesheet_df = balancesheet_df[1:]
balancesheet_df.columns = headers
balancesheet_df.set_index('Breakdown', inplace=True, drop=True)

warnings.warn('Amounts are in thousands.')

print(balancesheet_df)

0                                        12/31/2019  12/31/2018  12/31/2017  \
Breakdown                                                                     
Total Assets                             34,309,000  29,739,614  28,655,372   
Total Liabilities Net Minority Interest  26,842,000  23,981,974  23,420,784   
Total Equity Gross Minority Interest      7,467,000   5,757,640   5,234,588   
Total Capitalization                     18,252,000  14,326,915  13,655,631   
Common Stock Equity                       6,618,000   4,923,243   4,237,242   
Capital Lease Obligations                 1,050,000   1,855,900   1,801,010   
Net Tangible Assets                       6,081,000   4,572,592   3,815,503   
Working Capital                           1,436,000  -1,685,828  -1,104,150   
Invested Capital                         20,037,000  16,894,614  14,552,180   
Tangible Book Value                       6,081,000   4,572,592   3,815,503   
Total Debt                               14,469,000 

  from ipykernel import kernelapp as app


In [14]:
# End yahoo_balance_sheet(ticker):
# =============================================================================

In [15]:
# Begin yahoo_cash_flow(ticker)
# =============================================================================

In [16]:
# ## Read the yahoo cash flow url
cashflow_url = f'https://finance.yahoo.com/quote/{ticker}/cash-flow?p={ticker}'

read_url = ur.urlopen(cashflow_url).read()

# BeautifulSoup the xml
cashflow_soup = BeautifulSoup(read_url, 'lxml')

In [17]:
# ## Find relevant data structures for cash flow
div_list = []

# Find all HTML data structures that are divs
for div in cashflow_soup.find_all('div'):
    # Get the contents and titles
    div_list.append(div.string)

    # Prevent duplicate titles
    if not div.string == div.get('title'):
        div_list.append(div.get('title'))

In [18]:
# ## Filter out irrelevant data
# Exclude 'Operating Expenses' and 'Non-recurring Events'
div_list = [incl for incl in div_list if incl not in
            ('Operating Expenses', 'Non-recurring Events', 'Expand All')]

# Filter out 'empty' elements
div_list = list(filter(None, div_list))

# Filter out functions
div_list = [incl for incl in div_list if not incl.startswith('(function')]

# Sublist the relevant financial information
cashflow_list = div_list[13: -5]

# Insert "Breakdown" to the beginning of the list to give it the proper stucture
cashflow_list.insert(0, 'Breakdown')


In [19]:
# ## Create a DataFrame of the financial data

# Store the financial items as a list of tuples
cashflow_data = list(zip(*[iter(cashflow_list)]*6))

# Create a DataFrame
cashflow_df = pd.DataFrame(cashflow_data)

# Make the top row the headers
headers = cashflow_df.iloc[0]
cashflow_df = cashflow_df[1:]
cashflow_df.columns = headers
cashflow_df.set_index('Breakdown', inplace=True, drop=True)

warnings.warn('Amounts are in thousands.')

print(cashflow_df)

0                                          ttm  12/31/2019  12/31/2018  \
Breakdown                                                                
Operating Cash Flow                  2,705,000   2,405,000   2,097,802   
Investing Cash Flow                 -1,935,000  -1,436,000  -2,337,428   
Financing Cash Flow                  2,870,000   1,529,000     573,755   
End Cash Position                    9,089,000   6,783,000   4,276,388   
Income Tax Paid Supplemental Data            -      54,000      35,409   
Interest Paid Supplemental Data              -     455,000     380,836   
Capital Expenditure                 -1,906,000  -1,437,000  -2,319,516   
Issuance of Capital Stock            2,309,000     848,000           0   
Issuance of Debt                    10,607,000  10,669,000   6,176,173   
Repayment of Debt                  -10,357,000  -9,871,000  -6,087,029   
Free Cash Flow                         799,000     968,000    -221,714   

0                                  12

  from ipykernel import kernelapp as app


In [20]:
# End yahoo_cash_flow(ticker)
# =============================================================================