# Importing libraries

In [1]:
import pandas as pd
import numpy as np
import altair as alt

In [None]:
# use the 538 theme
alt.themes.enable('fivethirtyeight')

#  Visualisation functions 

In [3]:
def ticker_plot(df, name = 'Index plot'):
    open_close_color = alt.condition("datum.Open <= datum.Close",
                                 alt.value("#006633"),
                                 alt.value("#FF0000"))
    df['Date'] = pd.to_datetime(df['Date'])

    base = alt.Chart(df).encode(
        x=alt.X('Date:T', title="Year"),
        color=open_close_color
    ).properties(height = 250, width = 800)

    bar = base.mark_bar(size=2).encode(
        alt.Y('Open:Q'),
        alt.Y2('Close:Q'))

    brush = alt.selection_interval(encodings = ['x'])
    view = base.mark_bar(size=1).encode(y = alt.Y('Volume:Q')).add_selection(brush).properties(width=800,height=50)

    return (bar & view).properties(title = name)

In [4]:
def daily_stock_data_availability(input_DF):
    input_DF['Date'] = pd.to_datetime(input_DF['Date'])
    input_DF['year'] = input_DF['Date'].dt.year
    data_availability ={}
    years = input_DF['year'].unique()
    for year in years:
        year_df = input_DF[input_DF['year'] == year]
        data_availability[year] = (year_df.count()/len(year_df)*100).to_dict()
        data_availability[year]['total_rows'] = len(year_df)
    df_data_availability = pd.DataFrame(data_availability)
    total_rows_per_year = df_data_availability.iloc[-1,:]
    total_rows_per_year = total_rows_per_year.to_frame().reset_index()
    total_rows_per_year['index'] = pd.to_datetime(total_rows_per_year['index'], format='%Y')
    visualization1 = alt.Chart(total_rows_per_year, title="Number of days stock data available for ticker"
                                       ).mark_bar(size=20, color = '#96EE77').encode(
        x=alt.X('index:T', title='Year'),
        y=alt.Y('total_rows:Q', title='Days'),
        tooltip=[alt.Tooltip('total_rows', title='Stock day'), alt.Tooltip('index:T', format='%Y')]
        ).properties(height = 350, width = 350)
    
    features_availability = df_data_availability.iloc[1:-2,:]
    features_availability = features_availability.reset_index()
    features_availability = features_availability.melt(id_vars=['index'])
    base = alt.Chart(features_availability, title="Feature availability per year").encode(
        x=alt.X('index:N', title="Feature"),
        y=alt.Y('variable:N', title="Year")
    ).properties(height = 350, width = 1450)

    heatmap = base.mark_rect(stroke='white',strokeWidth=1).encode(
        color=alt.Color('value:Q', title="Percentage", scale=alt.Scale(domain=[20, 100],
                                          scheme='viridis', reverse=True)))

    text = base.mark_text(baseline='middle', color='white', size=6).encode(
        text=alt.Text ('value:Q', format='.1f'),
        color=alt.condition(alt.expr.datum['value'] > 30,
                                alt.value('white'),
                                alt.value('black')))


    visualization2 = (heatmap + text)
    
    return visualization1 | visualization2

# ELGIEQUIP - ELGI Equipments Ltd

In [None]:
ELGIEQUIP = pd.read_csv('../datasets/rawdata/rawdata_bse_500/ELGIEQUIP.BO.csv')

## Plotting ELGI Equipments Ltd

In [None]:
ticker_plot(ELGIEQUIP, name = 'ELGIEQUIP - ELGI Equipments Ltd')

In [None]:
daily_stock_data_availability(ELGIEQUIP)

# TV18BRDCST - TV18 Broadcast Ltd

In [None]:
TV18BRDCST = pd.read_csv('../datasets/rawdata/rawdata_bse_500/TV18BRDCST.BO.csv')

##  Plotting TV18 Broadcast Ltd

In [None]:
ticker_plot(TV18BRDCST, name = 'TV18 Broadcast Ltd')

In [None]:
daily_stock_data_availability(TV18BRDCST)

# IPCALAB - IPCA Laboratories Ltd

In [None]:
IPCALAB = pd.read_csv('../datasets/rawdata/rawdata_bse_500/IPCALAB.BO.csv')

##  Plotting IPCA Laboratories Ltd

In [None]:
ticker_plot(IPCALAB, name = 'IPCA Laboratories Ltd')

In [None]:
daily_stock_data_availability(IPCALAB)

# PGHL - Procter & Gamble Health Ltd

In [None]:
PGHL = pd.read_csv('../datasets/rawdata/rawdata_bse_500/PGHL.BO.csv')

## Plotting Procter & Gamble Health Ltd

In [None]:
ticker_plot(PGHL, name = 'Procter & Gamble Health Ltd')

In [None]:
daily_stock_data_availability(PGHL)

# EIHOTEL - EIH Ltd

In [None]:
EIHOTEL = pd.read_csv('../datasets/rawdata/rawdata_bse_500/EIHOTEL.BO.csv')

## Plotting EIH Ltd

In [None]:
ticker_plot(EIHOTEL, name = 'EIH Ltd')

In [None]:
daily_stock_data_availability(EIHOTEL)