# Historical Stock Prices

We need to get the stock prices of all the companies for which we have fundamental data.

We will use this information to compute returns, and fundamental metrics.

We will relay on the package YahooFinancials in order to get the ticker prices.

In [1]:
import IPython
IPython.auto_scroll_threshold = 9999

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
from IPython.display import display, HTML

%load_ext autoreload
%autoreload 2
%load_ext autotime

In [2]:
from os import path
from yahoofinancials import YahooFinancials
import pandas as pd

from json_utils import write_to_gzip_file, load_from_gzip_file

time: 1.04 s


In [3]:
# Load all the companies tickers and pivot by ticker_type
original_tickers_df = pd.read_csv('tickers.csv', header=None, names=["ccvm", "ticker", "ticker_type"])
tickers_df = pd.pivot(original_tickers_df, index='ccvm', columns='ticker_type', values='ticker')
tickers_df = tickers_df.add_prefix("ticker_type_")
tickers_df

ticker_type,ticker_type_3,ticker_type_4,ticker_type_5,ticker_type_6
ccvm,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
94,PATI3,PATI4,,
531,ARTR3,,,
574,ARLA3,ARLA4,,
701,BAHI3,,,
906,BBDC3,BBDC4,,
...,...,...,...,...
23612,MSRO3,,,
23710,HCBR3,,,
23728,PRCA3,,,
24783,NTCO3,,,


time: 72.3 ms


In [4]:
tickers_df = original_tickers_df

time: 2.88 ms


## Ticker Types

In the cash market, tickers are composed by four letters, a number, and a suffix in some cases. The letters stand for the listed company and the number disclosed the equity type, as follows

|  Number |  Class | Trade name indication
|---|---|---|
| 3 | common share | ON (''ordinária nominativa'') |
| 4 | preferred share | PN (''preferencial nominativa'') |
| 5 | preferred share class A | PNA |
| 6 | preferred share class B | PNB |

In order to get the stock prices, we are going to start with the ON ticker, and then try with the PN, PNA, and PNB.

In [5]:
# Initialize DSE Cluster connection
try:
    from dse.cluster import Cluster
except ImportError:
    from cassandra.cluster import Cluster

cluster = Cluster(['tfm_uoc_dse'])  # provide contact points and port
session = cluster.connect('tfm_uoc')

time: 209 ms


In [6]:
original_companies_df = session.execute("select ccvm, company_name, cnpj from bovespa_company;")
original_companies_df = pd.DataFrame(
    [{
        "ccvm": int(x.ccvm), 
        "company_name": x.company_name,
        "cnpj": x.cnpj} 
            for x in list(original_companies_df)])

time: 92.4 ms


In [7]:
from companies import get_companies_accounts

# Obtain all the companies with fundamental data 
companies_with_fundamentals_df = get_companies_accounts(session)
display("Total number of companies: {}".format(len(companies_with_fundamentals_df)))
companies_with_fundamentals_df.head(10)

'Total number of companies: 877'

Unnamed: 0,ccvm,num_accounts
0,11070,48111
1,21067,44244
2,20010,43019
3,20931,42939
4,2437,41730
5,22020,41496
6,6173,41192
7,15253,40267
8,21636,39393
9,7811,38559


time: 1.11 s


In [8]:
# Compute the ticker for each company (CCVM)
companies_df = pd.merge(left=companies_with_fundamentals_df, 
                        right=original_companies_df, 
                        how='left', 
                        left_on='ccvm', 
                        right_on='ccvm')

companies_df = pd.merge(left=companies_df, 
                        right=tickers_df, 
                        how='left', 
                        left_on='ccvm', 
                        right_on='ccvm')

companies_df.head(10)

Unnamed: 0,ccvm,num_accounts,company_name,cnpj,ticker,ticker_type
0,11070,48111,WLM INDÚSTRIA E COMÉRCIO S.A.,33.228.024/0001-51,SGAS3,3.0
1,11070,48111,WLM INDÚSTRIA E COMÉRCIO S.A.,33.228.024/0001-51,SGAS4,4.0
2,21067,44244,MOURA DUBEUX ENGENHARIA S/A,12.049.631/0001-84,,
3,20010,43019,EQUATORIAL ENERGIA S/A,03.220.438/0001-73,EQTL3,3.0
4,20931,42939,MINERVA S/A,67.620.377/0001-14,BEEF3,3.0
5,2437,41730,CENTRAIS ELETRICAS BRASILEIRAS SA,00.001.180/0001-26,ELET3,3.0
6,2437,41730,CENTRAIS ELETRICAS BRASILEIRAS SA,00.001.180/0001-26,ELET5,5.0
7,22020,41496,JSL S.A.,52.548.435/0001-79,JSLG3,3.0
8,6173,41192,TAURUS ARMAS S.A.,92.781.335/0001-02,FJTA4,4.0
9,6173,41192,TAURUS ARMAS S.A.,92.781.335/0001-02,FJTA3,3.0


time: 22.4 ms


In [12]:
companies_no_ticker_df = companies_df[(pd.isnull(companies_df["ticker"]))]
companies_no_ticker_df = companies_no_ticker_df.sort_values(['ccvm'], ascending=[1])
print(f"Companies without Ticker Info: {companies_no_ticker_df.count()}")

Companies without Ticker Info: ccvm            493
num_accounts    493
company_name    493
cnpj            493
ticker            0
ticker_type       0
dtype: int64
time: 16.5 ms


In [13]:
companies_ticker_df = companies_df[(pd.notnull(companies_df["ticker"]))]
companies_ticker_df = companies_ticker_df.sort_values(['ccvm'], ascending=[1])
print(f"Companies with Ticker Info: {companies_ticker_df.count()}")

Companies with Ticker Info: ccvm            558
num_accounts    558
company_name    558
cnpj            558
ticker          558
ticker_type     558
dtype: int64
time: 11.1 ms


In [23]:
if path.exists("downloaded_financials.json.gz"):
    company_stock_prices = load_from_gzip_file("downloaded_financials.json.gz")
else:
    company_stock_prices = {}

time: 7.27 s


In [24]:
processed_stocks = company_stock_prices.keys()
# for index, ticker in enumerate(tickers.to_list()):
for index, row in companies_ticker_df.iterrows():
    ticker = row["ticker"]
    if ticker not in processed_stocks:
        print(f"Getting ticker [{index}]: {ticker}.SA")
        yahoo_financials = YahooFinancials(f"{ticker}.SA")
        historical_stock_prices = yahoo_financials.get_historical_price_data('2000-01-01', '2020-01-07', 'daily')
        company_stock_prices[ticker] = historical_stock_prices[f"{ticker}.SA"]
    else:
        print(f"[{index}]: {ticker}.SA already processed...")

[264]: PATI4.SA already processed...
[263]: PATI3.SA already processed...
[237]: BAHI3.SA already processed...
[245]: BBDC4.SA already processed...
[246]: BBDC3.SA already processed...
[392]: BAZA3.SA already processed...
[137]: BBAS3.SA already processed...
[274]: BGIP4.SA already processed...
[275]: BGIP3.SA already processed...
[226]: BEES3.SA already processed...
[227]: BEES4.SA already processed...
[861]: BPAR3.SA already processed...
[311]: BRSR3.SA already processed...
[312]: BRSR5.SA already processed...
[429]: BNBR3.SA already processed...
[660]: BMIN4.SA already processed...
[659]: BMIN3.SA already processed...
[277]: BMEB4.SA already processed...
[276]: BMEB3.SA already processed...
[342]: BRIV3.SA already processed...
[343]: BRIV4.SA already processed...
[119]: BDLL4.SA already processed...
[120]: BDLL3.SA already processed...
[364]: BALM4.SA already processed...
[363]: BALM3.SA already processed...
[481]: BAUH4.SA already processed...
[480]: BAUH3.SA already processed...
[

In [25]:
if not path.exists("downloaded_financials.json.gz"):
    write_to_gzip_file('downloaded_financials.json.gz', company_stock_prices)
else:
    print("donwloaded financials file already exists.")

donwloaded financials file already exists.
time: 3.03 ms


In [28]:
# Generate the dataset with the historical prices for all the tickers.
stock_prices = []
tickers_with_prices = []
ccvm_by_ticker = {}
for ticker, historical_data in company_stock_prices.items():
    if "prices" in historical_data and len(historical_data["prices"]) > 0:
        tickers_with_prices.append(ticker)
        for stock_price in historical_data["prices"]:
            data = stock_price.copy()
            data["ticker"] = ticker
            
            if "instrumentType" not in historical_data:
                data["type"] = "UNKNOWN"
            else:
                data["type"] = historical_data["instrumentType"]

            # Get the CCVM associated to the ticker
            if ticker not in ccvm_by_ticker:
                ticker_ccvm = companies_ticker_df[companies_ticker_df.ticker == ticker].ccvm
                ccvm = ticker_ccvm.loc[ticker_ccvm.index[0]]
                ccvm_by_ticker[ticker] = ccvm
                            
            data["ccvm"] = ccvm_by_ticker[ticker]
            
            stock_prices.append(data)
                                   
stock_prices_df = pd.DataFrame(stock_prices)
# Convert the 'formatted_date' column type from string to datetime
stock_prices_df['formatted_date'] =  pd.to_datetime(stock_prices_df['formatted_date'], format='%Y-%m-%d')            

time: 4.9 s


In [29]:
print(f"We have {len(tickers_with_prices)} with historic stock prices")

We have 401 with historic stock prices
time: 2.29 ms


In [30]:
# Save the dataset as a CSV file
stock_prices_df.to_csv('stock_prices.csv.gz', compression='gzip')

time: 35 s


In [31]:
stock_prices_df = pd.read_csv('stock_prices.csv.gz', parse_dates=['date'])
stock_prices_df

  interactivity=interactivity, compiler=compiler, result=result)


Unnamed: 0.1,Unnamed: 0,date,high,low,open,close,volume,adjclose,formatted_date,ticker,type,ccvm,numerator,denominator,splitRatio,data,amount
0,0,1199275200,6.498830,6.498830,6.498830,6.498830,215.0,3.810863,2008-01-02,PATI4,EQUITY,94,,,,,
1,1,1199361600,,,,,,,2008-01-03,PATI4,EQUITY,94,,,,,
2,2,1199448000,,,,,,,2008-01-04,PATI4,EQUITY,94,,,,,
3,3,1199707200,,,,,,,2008-01-07,PATI4,EQUITY,94,,,,,
4,4,1199793600,,,,,,,2008-01-08,PATI4,EQUITY,94,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1189745,1189745,1577448000,29.770000,28.860001,29.520000,28.870001,418100.0,28.700651,2019-12-27,VIVA3,EQUITY,24805,,,,,
1189746,1189746,1577707200,29.219999,28.719999,28.940001,28.969999,252600.0,28.969999,2019-12-30,VIVA3,EQUITY,24805,,,,,
1189747,1189747,1577966400,29.570000,28.940001,29.290001,29.100000,490200.0,29.100000,2020-01-02,VIVA3,EQUITY,24805,,,,,
1189748,1189748,1578052800,29.250000,28.129999,29.000000,29.000000,1128400.0,29.000000,2020-01-03,VIVA3,EQUITY,24805,,,,,


time: 3.18 s


In [32]:
# Generate the dataset with the historical events for all the tickers.
ticker_events = []
for ticker, historical_data in company_stock_prices.items():
    if "eventsData" in historical_data:        
        if "dividends" in historical_data["eventsData"] and len(historical_data["eventsData"]["dividends"]) > 0:
            for date, dividend in historical_data["eventsData"]["dividends"].items():
                data = dividend.copy()
                data["type"] = "DIVIDEND"
                data["ticker"] = ticker
                ticker_events.append(data)

        if "splits" in historical_data["eventsData"] and len(historical_data["eventsData"]["splits"]) > 0:
            for date, split in historical_data["eventsData"]["splits"].items():
                data = split.copy()
                data["type"] = "SPLIT"                
                data["ticker"] = ticker
                ticker_events.append(data)

ticker_events_df = pd.DataFrame(ticker_events)
# Convert the 'formatted_date' column type from string to datetime
ticker_events_df['formatted_date'] =  pd.to_datetime(ticker_events_df['formatted_date'], format='%Y-%m-%d')            

time: 48.8 ms


In [33]:
ticker_events_df.head(10)

Unnamed: 0,amount,date,formatted_date,type,ticker,numerator,denominator,splitRatio
0,1.12487,1430312400,2015-04-29,DIVIDEND,PATI4,,,
1,1.15168,1461762000,2016-04-27,DIVIDEND,PATI4,,,
2,1.14342,1493298000,2017-04-27,DIVIDEND,PATI4,,,
3,1.3918,1554382800,2019-04-04,DIVIDEND,PATI4,,,
4,1.06606,1365685200,2013-04-11,DIVIDEND,PATI4,,,
5,1.18953,1523019600,2018-04-06,DIVIDEND,PATI4,,,
6,0.42376,1272546000,2010-04-29,DIVIDEND,PATI4,,,
7,1.00381,1335445200,2012-04-26,DIVIDEND,PATI4,,,
8,1.34149,1398430800,2014-04-25,DIVIDEND,PATI4,,,
9,0.962588,1304341200,2011-05-02,DIVIDEND,PATI4,,,


time: 23.3 ms


In [34]:
# Save the dataset as a CSV file
ticker_events_df.to_csv('ticker_events.csv')

time: 182 ms


In [35]:
# Generate the dataset with the ticker details.
ticker_details = []
for ticker, historical_data in company_stock_prices.items():
    ticker_data = {
        "ticker": ticker        
    }
    
    if "firstTradeDate" in historical_data: 
        ticker_data["first_trade_date"] = historical_data["firstTradeDate"]["date"]
        ticker_data["first_trade_formatted_date"] = pd.to_datetime(
            historical_data["firstTradeDate"]["formatted_date"], 
            format='%Y-%m-%d')
        
    if "currency" in historical_data:
        ticker_data["currency"] = historical_data["currency"]

    if "instrumentType" in historical_data:
        ticker_data["instrument_type"] = historical_data["instrumentType"]

    if "timeZone" in historical_data:
        ticker_data["time_zone_gmt_offset"] = historical_data["timeZone"]["gmtOffset"]
    
    ticker_details.append(ticker_data)

ticker_details_df = pd.DataFrame(ticker_details)

time: 180 ms


In [36]:
ticker_details_df.head(10)

Unnamed: 0,ticker,first_trade_date,first_trade_formatted_date,currency,instrument_type,time_zone_gmt_offset
0,PATI4,1199274000.0,2008-01-02,BRL,EQUITY,-7200.0
1,PATI3,948887100.0,2000-01-26,BRL,EQUITY,-7200.0
2,BAHI3,1021985000.0,2002-05-21,BRL,EQUITY,-7200.0
3,BBDC4,1199274000.0,2008-01-02,BRL,EQUITY,-7200.0
4,BBDC3,946899900.0,2000-01-03,BRL,EQUITY,-7200.0
5,BAZA3,946899900.0,2000-01-03,BRL,EQUITY,-7200.0
6,BBAS3,946899900.0,2000-01-03,BRL,EQUITY,-7200.0
7,BGIP4,1199274000.0,2008-01-02,BRL,EQUITY,-7200.0
8,BGIP3,947072700.0,2000-01-05,BRL,EQUITY,-7200.0
9,BEES3,1559652000.0,2019-06-04,BRL,EQUITY,-7200.0


time: 30.4 ms


In [37]:
# Save the dataset as a CSV file
ticker_details_df.to_csv('ticker_details.csv')

time: 128 ms


In [38]:
print(f"Total data points: {stock_prices_df.count()}")

Total data points: Unnamed: 0        1189750
date              1189750
high              1091492
low               1091492
open              1091492
close             1091492
volume            1091492
adjclose          1091492
formatted_date    1189750
ticker            1189750
type              1189750
ccvm              1189750
numerator               2
denominator             2
splitRatio              2
data                    6
amount                  4
dtype: int64
time: 198 ms


In [39]:
stock_prices_aggr_df = stock_prices_df.groupby(["ticker"]).count()
stock_prices_aggr_df.mean()

Unnamed: 0        2966.957606
date              2966.957606
high              2721.925187
low               2721.925187
open              2721.925187
close             2721.925187
volume            2721.925187
adjclose          2721.925187
formatted_date    2966.957606
type              2966.957606
ccvm              2966.957606
numerator            0.004988
denominator          0.004988
splitRatio           0.004988
data                 0.014963
amount               0.009975
dtype: float64

time: 273 ms


# Bovespa Index

We need to get the history index price of the index of reference of Bovespa, IBOV.

This index is accessible through the ticker: __^BVSP__.

We will save the result in the file: __ibov.csv__.

In [40]:
finance = YahooFinancials(f"^BVSP")
ibov_data = finance.get_historical_price_data('2000-01-01', '2020-01-03', 'daily')

time: 809 ms


In [41]:
# Generate the dataset with the historical prices for all the tickers.
ibov_prices = []
for ticker, historical_data in ibov_data.items():
    if "prices" in historical_data and len(historical_data["prices"]) > 0:
        for index_price in historical_data["prices"]:
            data = index_price.copy()
            data["ticker"] = ticker
            data["ccvm"] = 'BVSP'
            data["type"] = "INDEX"
            ibov_prices.append(data)
                        
ibov_prices_df = pd.DataFrame(ibov_prices)
# Convert the 'formatted_date' column type from string to datetime
ibov_prices_df['formatted_date'] =  pd.to_datetime(ibov_prices_df['formatted_date'], format='%Y-%m-%d')            

time: 16.9 ms


In [42]:
ibov_prices_df.head(10)

Unnamed: 0,date,high,low,open,close,volume,adjclose,formatted_date,ticker,ccvm,type
0,946900800,17408.0,16719.0,17098.0,16930.0,0.0,16930.0,2000-01-03,^BVSP,BVSP,INDEX
1,946987200,16908.0,15851.0,16908.0,15851.0,0.0,15851.0,2000-01-04,^BVSP,BVSP,INDEX
2,947073600,16302.0,15350.0,15871.0,16245.0,0.0,16245.0,2000-01-05,^BVSP,BVSP,INDEX
3,947160000,16499.0,15977.0,16237.0,16107.0,0.0,16107.0,2000-01-06,^BVSP,BVSP,INDEX
4,947246400,16449.0,16125.0,16125.0,16309.0,0.0,16309.0,2000-01-07,^BVSP,BVSP,INDEX
5,947505600,17057.0,16325.0,16325.0,17022.0,0.0,17022.0,2000-01-10,^BVSP,BVSP,INDEX
6,947592000,17197.0,16573.0,17045.0,16573.0,0.0,16573.0,2000-01-11,^BVSP,BVSP,INDEX
7,947678400,16724.0,16331.0,16574.0,16617.0,0.0,16617.0,2000-01-12,^BVSP,BVSP,INDEX
8,947764800,17715.0,16663.0,16663.0,17298.0,0.0,17298.0,2000-01-13,^BVSP,BVSP,INDEX
9,947851200,17800.0,17306.0,17306.0,17658.0,0.0,17658.0,2000-01-14,^BVSP,BVSP,INDEX


time: 11.8 ms


In [43]:
# Save the dataset as a CSV file
ibov_prices_df.to_csv('ibov_prices.csv')

time: 79.1 ms


## Move data to DSE Node (Cassandra)

In [44]:
import IPython
IPython.auto_scroll_threshold = 9999

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
from IPython.display import display, HTML

%load_ext autoreload
%autoreload 2
%load_ext autotime

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
The autotime extension is already loaded. To reload it, use:
  %reload_ext autotime
time: 3.2 ms


In [45]:
import pandas as pd
import numpy as np
from db import sync_table
from pyspark.sql.types import *

from spark import init_spark_context, load_and_get_table_df
from companies import get_companies_with_tickers_and_fundamentals


sc, sql_context = init_spark_context("History Prices Job")

time: 3.17 s


In [46]:
stock_prices_df = pd.read_csv('stock_prices.csv.gz', parse_dates=['formatted_date'])
stock_prices_df = stock_prices_df[
    ["type", "ticker", "ccvm", "formatted_date", "high", "low", "open", "close", "adjclose", "volume"]]
stock_prices_df

Unnamed: 0,type,ticker,ccvm,formatted_date,high,low,open,close,adjclose,volume
0,EQUITY,PATI4,94,2008-01-02,6.498830,6.498830,6.498830,6.498830,3.810863,215.0
1,EQUITY,PATI4,94,2008-01-03,,,,,,
2,EQUITY,PATI4,94,2008-01-04,,,,,,
3,EQUITY,PATI4,94,2008-01-07,,,,,,
4,EQUITY,PATI4,94,2008-01-08,,,,,,
...,...,...,...,...,...,...,...,...,...,...
1189745,EQUITY,VIVA3,24805,2019-12-27,29.770000,28.860001,29.520000,28.870001,28.700651,418100.0
1189746,EQUITY,VIVA3,24805,2019-12-30,29.219999,28.719999,28.940001,28.969999,28.969999,252600.0
1189747,EQUITY,VIVA3,24805,2020-01-02,29.570000,28.940001,29.290001,29.100000,29.100000,490200.0
1189748,EQUITY,VIVA3,24805,2020-01-03,29.250000,28.129999,29.000000,29.000000,29.000000,1128400.0


time: 3.04 s


In [47]:
stock_prices_df = pd.read_csv('stock_prices.csv.gz', parse_dates=['formatted_date'])
stock_prices_df = stock_prices_df[
    ["type", "ticker", "ccvm", "formatted_date", "high", "low", "open", "close", "adjclose", "volume"]]
stock_prices_df.columns = ["type", "ticker", "ccvm", "date", "high", "low", "open", "close", "adjclose", "volume"]

index_reference_df = pd.read_csv('ibov_prices.csv', parse_dates=['formatted_date'])
index_reference_df = index_reference_df[
    ["type", "ticker", "ccvm", "formatted_date", "high", "low", "open", "close", "adjclose", "volume"]]
index_reference_df.columns = ["type", "ticker", "ccvm", "date", "high", "low", "open", "close", "adjclose", "volume"]

risk_free_rate_df = pd.read_csv('Brazil_10year_bond_yield_historical_data.csv', parse_dates=['Date'])
risk_free_rate_df.columns = ["date", "close", "open", "high", "low", "change"]
risk_free_rate_df.drop(columns=["change"], inplace=True)
risk_free_rate_df.insert(0, "ccvm", "GEBR10Y")
risk_free_rate_df.insert(0, "ticker", "GEBR10Y")
risk_free_rate_df.insert(0, "type", "DEBT")
risk_free_rate_df["adjclose"] = risk_free_rate_df["close"]
risk_free_rate_df["volume"] = np.nan
risk_free_rate_df = risk_free_rate_df[["type", "ticker", "ccvm", "date", "high", "low", "open", "close", "adjclose", "volume"]]

time: 5.87 s


In [48]:
security_prices_df = pd.concat([stock_prices_df, index_reference_df, risk_free_rate_df])

time: 93.8 ms


In [49]:
prices_schema = StructType([
    StructField("type", StringType(), True),
    StructField("ticker", StringType(), True),
    StructField("ccvm", StringType(), True),    
    StructField("date", DateType(), True),
    StructField("high", FloatType(), True),
    StructField("low", FloatType(), True),
    StructField("open", FloatType(), True),
    StructField("close", FloatType(), True),
    StructField("adjclose", FloatType(), True),
    StructField("volume", FloatType(), True)])   

security_prices_spark_df = sql_context.createDataFrame(
    security_prices_df,
    schema=prices_schema)

time: 23.5 s


In [50]:
sync_table(security_prices_spark_df, "tfm_uoc_dse", "tfm_uoc_analysis", "security_prices", ["ccvm", "ticker", "date"])

Closing connections
time: 185 ms




In [51]:
security_prices_spark_df.write\
    .format("org.apache.spark.sql.cassandra")\
    .options(table="security_prices", keyspace="tfm_uoc_analysis")\
    .option("confirm.truncate","true")\
    .mode("overwrite")\
    .partitionBy("astodate")\
    .save()

time: 3min 17s


In [52]:
companies_tickers_df = get_companies_with_tickers_and_fundamentals(session)

companies_tickers_df = companies_tickers_df[["ticker", "ccvm", "company_name", "cnpj", "ticker_type", "num_accounts"]]

tickers_schema = StructType([
    StructField("ticker", StringType(), True),
    StructField("ccvm", StringType(), True),
    StructField("company_name", StringType(), True),    
    StructField("cnpj", StringType(), True),    
    StructField("ticker_type", FloatType(), True),    
    StructField("num_accounts", IntegerType(), True)])   

companies_tickers_spark_df = sql_context.createDataFrame(
    companies_tickers_df,
    schema=tickers_schema)

  if (await self.run_code(code, result,  async_=asy)):


time: 6.66 s


In [53]:
sync_table(companies_tickers_spark_df, "tfm_uoc_dse", "tfm_uoc_analysis", "company_tickers", ["ticker"])

Closing connections
time: 2.04 s


In [54]:
companies_tickers_spark_df.write\
    .format("org.apache.spark.sql.cassandra")\
    .options(table="company_tickers", keyspace="tfm_uoc_analysis")\
    .option("confirm.truncate","true")\
    .mode("overwrite")\
    .partitionBy("astodate")\
    .save()

time: 2.81 s


In [55]:
sc.stop()

time: 967 ms
