In [1]:
try:
    from dse.cluster import Cluster
except ImportError:
    from cassandra.cluster import Cluster

cluster = Cluster(['tfm_uoc_dse'])  # provide contact points and port
session = cluster.connect('tfm_uoc')

import pandas as pd

In [2]:
from dateutil.parser import parse as parse_date

In [9]:
companies = session.execute("select ccvm, company_name, cnpj from bovespa_company;")
companies_df = pd.DataFrame(sorted(list(companies), key=lambda x: x.company_name))

companies_df.head(10)

Unnamed: 0,ccvm,company_name,cnpj
0,21954,3A COMPANHIA SECURITIZADORA,11.396.633/0001-87
1,16330,521 PARTICIPAÇOES S.A. - EM LIQUIDAÇÃO EXTRAJU...,01.547.749/0001-16
2,16284,524 PARTICIPAÇOES SA,01.851.771/0001-55
3,16349,525 PARTICIPAÇOES SA,01.919.008/0001-19
4,505480,A C M AGRICOLA SA,31.694.680/0001-14
5,502316,A G HOTEIS E TURISMO SA,08.690.448/0001-96
6,35,A J RENNER SA IND E PARTIP,92.659.614/0001-06
7,504076,A M FIDALGO SA MAT DE CONSTR,04.895.165/0001-20
8,503592,A O GASPAR INDS SA,06.932.909/0001-64
9,16802,A.P. PARTICIPAÇOES SA,02.288.752/0001-25


In [8]:
import json

solr_query = {
    "q": "*:*",
    "facet": {
        "field": "ccvm_exact",
        "limit": 10000
    }
}

fields_per_company_query = \
    f"select * from bovespa_company_file WHERE solr_query='{json.dumps(solr_query)}'"

files_per_company = session.execute(fields_per_company_query).one()
files_per_company = json.loads(files_per_company.facet_fields)["ccvm_exact"]
files_per_company = pd.DataFrame([{
    "type": "Companies", 
    "ccvm": ccvm, 
    "num_files": round(num_files / 4, 0)} 
        for ccvm, num_files in files_per_company.items()])
display("Total number of companies: {}".format(len(files_per_company)))

files_per_company.head(10)

'Total number of companies: 9'

Unnamed: 0,type,ccvm,num_files
0,Companies,11070,16.0
1,Companies,1023,14.0
2,Companies,10880,14.0
3,Companies,10456,12.0
4,Companies,10960,12.0
5,Companies,10472,10.0
6,Companies,10561,9.0
7,Companies,10596,6.0
8,Companies,5150,4.0


In [10]:
import altair as alt

bar = alt.Chart(files_per_company).mark_bar().encode(
    alt.X('num_files:Q', bin=True, axis=None),
    alt.Y('count()')
)

rule = alt.Chart(files_per_company).mark_rule(color='red').encode(
    x='mean(num_files):Q',
    size=alt.value(5)
)

bar + rule

In [11]:
def boxplot_altair(data, x, y, xtype='N', ytype='Q',
                   size=40, width=400):
    """
    Python function to make boxplots in Altair
    """
    # Define variables and their types using f-strings in Python
    lower_box=f'q1({y}):{ytype}'
    lower_whisker=f'min({y}):{ytype}'
    upper_box=f'q3({y}):{ytype}'
    upper_whisker=f'max({y}):{ytype}'
    median_whisker=f'median({y}):{ytype}'
    x_data=f'{x}:{xtype}'
 
    # lower plot
    lower_plot = alt.Chart(data).mark_rule().encode(
        y=alt.Y(lower_whisker, axis=alt.Axis(title=y)),
        y2=lower_box,
        x=x_data
    ).properties(
        width=width)
 
    # middle plot
    middle_plot = alt.Chart(data).mark_bar(size=size).encode(
        y=lower_box,
        y2=upper_box,
        x=x_data
    ).properties(
        width=width)
 
    # upper plot
    upper_plot = alt.Chart(data).mark_rule().encode(
        y=upper_whisker,
        y2=upper_box,
        x=x_data
    ).properties(
        width=width)
     
    # median marker line
    middle_tick = alt.Chart(data).mark_tick(
        color='white',
        size=size
    ).encode(
        y=median_whisker,
        x=x_data,
    )
     
    # combine all the elements of boxplot to a single chart object
    chart = lower_plot + middle_plot + upper_plot + middle_tick
     
    # return chart object
    return chart

In [12]:
boxplot_altair(files_per_company, '', 'num_files', width=200)

In [13]:
def show_accounts(company_ccvm, balance_type, financial_info_type, period):
    global accounts
    
    print(company_ccvm)
    params = [company_ccvm, balance_type, financial_info_type, "{:%Y-%m-%d}".format(period)]
    rows = session.execute(
        "SELECT number, name, financial_info_type, balance_type, amount, comments FROM bovespa_account WHERE ccvm = %s AND balance_type = %s AND financial_info_type = %s AND period = %s;",
        parameters=params)
    rows = list(rows)
    if len(rows) > 0:
        pd.set_option('display.max_rows', 200)
        accounts = pd.DataFrame(list(rows))
        display(accounts[["number", "name", "amount", "comments"]])
    else:
        accounts = None
        display("No information available")

In [14]:
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
from IPython.display import display

In [15]:
companies_df_subset = subset = companies_df[['company_name', 'ccvm']]
companies_options = [tuple(x) for x in companies_df_subset.values]

style = {'description_width': 'initial'}

company_ccvm_sel = widgets.Dropdown(
    options=companies_options,
    description='Company CCVM:',
    disabled=False,
    style=style
)

balance_type_options = [
    ("Capital (only Accumulated)", "IF"),
    ("Balance Sheet - Assets", "ASSETS"),
    ("Balance Sheet - Liabilities", "LIABILITIES"),
    ("Income Statement (P&L)", "DRE"), 
    ("Comprehensive Income", "DRA"),     
    ("Cash Flow Statement (Direct Method)", "DFC_MD"),     
    ("Cash Flow Statement (Indirect Method)", "DFC_MI"),     
    ("Equity and Cash", "DMPL"),
    ("Statement of Added Value", "DVA")    
]

balance_type_sel = widgets.Dropdown(
    options=balance_type_options,
    value='ASSETS',
    description='Balance Type:',
    disabled=False,
    style=style
)

financial_info_type_sel = widgets.Dropdown(
    options=[('Current', "INSTANT"), ('Accumulated', 'DURATION')],
    value='INSTANT',
    description='Financial Information Type:',
    disabled=False,
    style=style
)

period_sel = widgets.DatePicker(
    description='Fiscal period',
    value=parse_date("2013-06-30"),
    disabled=False,
    style=style
)

# interact(show_accounts, company_ccvm=company_ccvm_sel, balance_type=balance_type_sel, financial_info_type=financial_info_type_sel, period=period_sel)

out = widgets.interactive_output(show_accounts, {
    'company_ccvm': company_ccvm_sel, 
    'balance_type': balance_type_sel, 
    'financial_info_type': financial_info_type_sel,
    'period': period_sel})

widgets.VBox([widgets.HBox([company_ccvm_sel, period_sel]), 
              widgets.HBox([balance_type_sel, financial_info_type_sel]),
              out])

VBox(children=(HBox(children=(Dropdown(description='Company CCVM:', options=(('3A COMPANHIA SECURITIZADORA', '…