# vcubo
## 1.1 Descriptive analytics (General)

#### Import libraries

In [1]:
import numpy as np
import pandas as pd
import scipy as sp
import scipy.stats

import plotly.graph_objects as go
import plotly.express as px

from ipywidgets import widgets, Layout
import ipywidgets


#### Import data

In [92]:
df = pd.read_csv('https://raw.githubusercontent.com/vcubo/beta_0.1/main/VCDB_210828v0.csv') #Important: set raw URL

#### Model and coefficients

In [82]:
# Underlying distriburion
df_distrib = pd.DataFrame([['lognormal', 0.1, 0.3, -0.3]], columns = ['type', 'mu', 'sigma', 'shift'])

# Coefficient obteined by regression
df_coef = pd.DataFrame([[0.25,0.05,0.4,0.3,0.2,0.6,0.35,0.5,0.6,0.8,0.7]], 
                       columns = ['COUNTRY', 'LOB', 'SITE', 'PSIZE', 'CSIZE','SOC','PROC','ENG', 'WEA', 'MGM','MIT_ef'])

# List of variables:
df_part_index = ['Country','LOB','Site','Project Size', 'Contractor', 
                               'Social', 'Procurement', 'Engineering', 'Weather', 'Management']

In [83]:
# Preliminary model: uncertainty and risk's variables' partial impacts.
def partials (df):
    df_part = pd.DataFrame([df_coef.iloc[0]['COUNTRY']*(df['COUNTRY_RMEAN']*1),
                           df_coef.iloc[0]['LOB']*(df['LOB_RMEAN']),
                           df_coef.iloc[0]['SITE']*(df['SITE_RMEAN']),
                           df_coef.iloc[0]['PSIZE']*(df['PSIZE_RMEAN']),
                           df_coef.iloc[0]['CSIZE']*(df['CSIZE_RMEAN']), 
                           df_coef.iloc[0]['SOC']*df['SOC_EMEAN']*(df['SOC']-df['SOC_MIT']),
                           df_coef.iloc[0]['PROC']*df['PROC_EMEAN']*(df['PROC']-df['PROC_MIT']),
                           df_coef.iloc[0]['ENG']*df['ENG_EMEAN']*(df['ENG']-df['ENG_MIT']),
                           df_coef.iloc[0]['WEA']*df['WEA_EMEAN']*(df['WEA']-df['WEA_MIT']),
                           df_coef.iloc[0]['MGM']*df['MGM_EMEAN']*(df['MGM']-df['MGM_MIT']),
                          ], 
                          index = df_part_index
                         ).transpose()

    df_part_mean = pd.DataFrame([df_part.mean().transpose().tolist(),
                                 df_part_index, ['Uncertainty']*5+['Risk']*5]
                                 ).transpose()
    df_part_mean.columns=['Impact', 'Variable','Factor']
    partial_impact = px.bar(df_part_mean,x='Factor',y='Impact', color = 'Variable', template = "plotly")
    partial_impact.update_layout({'paper_bgcolor': 'whitesmoke'},yaxis=dict(tickformat=".1%"))
    
    return df_part_mean
#partials(df)
#[partials(df)['Impact'][i] for i in range(10)]

#### Statistics

In [84]:
#Number of decimals in statistical results.
decimals = 3
def df_stats(df):
    #Number of projects.
    num_pro_tot =  len(df)

    #List of statistics for DEV_RAN(uncertainty median deviation), DEV_EVE(risks' median deviation) and DEV_TOT (total deviation median)
    DEV_mean = [np.round(np.mean(df['DEV_RAN']),decimals), np.round(np.mean(df['DEV_EVE']),decimals), np.round(np.mean(df['DEV_TOT']),decimals)]
    DEV_median = [np.round(np.median(df['DEV_RAN']),decimals), np.round(np.median(df['DEV_EVE']),decimals), np.round(np.median(df['DEV_TOT']),decimals)]

    DR_factor = (1+DEV_median[0]) #deviation caused by uncertainty
    DE_factor = (1+DEV_median[1]) #deviation caused by risk events

    #Mean duration deviation (in months) and partial 
    DUR_delta = np.mean(df['DUR_AC']-df['DUR_BL'])
    DUR_delta_comp = [DUR_delta/DE_factor, DUR_delta/DR_factor]
    
    #Portion of the duration deviation caused by the uncertainty and by the risks
    DUR_delta_comp
    
    return [num_pro_tot, DEV_median]



#### Interactive widgets

In [85]:
#Options for filters
opt_country = ['ALL'] + df['COUNTRY'].unique().tolist()
opt_lob = ['ALL'] + df['LOB'].unique().tolist()
opt_site = ['ALL'] + df['SITE'].unique().tolist()
opt_prsize = ['ALL'] + df['PR_SIZE'].unique().tolist()
opt_csize = ['ALL'] + df['MC_SIZE'].unique().tolist()

#Widgets:
elements_width = '40%'
elements_height = '25px'

select_country = widgets.Dropdown(
    options=opt_country,
    value='ALL',
    #value=['ARG'],
    #rows=len(options),
    description='Country',
    disabled=False,
    layout=Layout(width=elements_width, height=elements_height),
    tooltip="Filter by Country"
)


select_lob = widgets.Dropdown(
    options=opt_lob,
    value='ALL',
    description='LOB',
    layout=Layout(width=elements_width, height=elements_height),
    disabled=False,
    tooltip="Filter by Line of Business"
)

select_site = widgets.Dropdown(
    options=opt_site,
    value='ALL',
    description='Site',
    layout=Layout(width=elements_width, height=elements_height),
    disabled=False,
    tooltip="Filter by site conditions"
)

select_prsize = widgets.Dropdown(
    options=opt_prsize,
    value='ALL',
    description='Project size',
    layout=Layout(width=elements_width, height=elements_height),
    disabled=False,
    tooltip="Filter by Projects' size"
)

select_csize = widgets.Dropdown(
    options=opt_csize,
    value='ALL',
    description='Contractor size',
    layout=Layout(width=elements_width, height=elements_height),
    disabled=False,
    tooltip="Filter by Main Contractor's size"
)

#select_distrib = widgets.SelectMultiple(
#    options=opt_distrib,
#    value=['Total'],
#    rows=len(opt_distrib),
#    description='Distribution',
#    disabled=False
#)

#apply_filter = widgets.Button(
#    description='APPLY',
#    disabled=False,
#    button_style='', # 'success', 'info', 'warning', 'danger' or ''
#    tooltip='Apply selected filters',
#    icon='check'
#)

stat_selection = widgets.HTML(
    value = "<h4>Distribution's statistics</h4>"+
    "Quantity of projects considered = "+str(df_stats(df)[0])+".<br>"+
    "The average (mean) delay (%) of the selected projects is "+ str(df_stats(df)[1][2])+".<br>",
    placeholder = '', 
    description = ''
)

#### Traces

In [86]:
hist_xbins_size = 0.01

figh1 = go.Histogram(x=df['DEV_TOT'], opacity=0.7, name='Total deviation',xbins={"size": hist_xbins_size})
figh2 = go.Histogram(x=df['DEV_RAN'], opacity=0.5, name='Uncertainty',xbins={"size": hist_xbins_size/2})
figh3 = go.Histogram(x=df['DEV_EVE'], opacity=0.5, name='Risk events',xbins={"size": hist_xbins_size/2})

partial_imp = go.Bar(x=partials(df)['Factor'],y=partials(df)['Impact'])



#### Figure widget and layout formating

In [87]:
# Deviation histogram graph
g_dev_hist1 = go.FigureWidget(data=[figh1],
                              layout=go.Layout(
                                  title=dict(text='Deviation distribution'),
                                  barmode='overlay',
                                  paper_bgcolor='whitesmoke',
                                  #plot_bgcolor='slategray'
                                  bargap = 0.01,
                                  xaxis=dict(tickformat=".1%")
                              ))
g_dev_hist2 = go.FigureWidget(data=[figh2, figh3],
                              layout=go.Layout(
                                  title=dict(text="Uncertainty and risk's impact distribution"#, x = 0.5
                                            ),
                                  barmode='overlay',
                                  paper_bgcolor='whitesmoke',
                                  bargap = 0.01,
                                  xaxis=dict(tickformat=".1%")
                              ))

g_dev_bar1 = go.FigureWidget(data=[partial_imp],
                              layout=go.Layout(
                                  title=dict(text="Uncertainty and risk's distribution <br>(selection)"#, x = 0.5
                                            ),
                                  barmode='relative',
                                  boxmode='group',
                                  #colorway=pc.sequential.Viridis,
                                  
                                  paper_bgcolor='whitesmoke',
                                  bargap = 0.5,
                                  bargroupgap=0.01,
                                  yaxis=dict(tickformat=".1%")
                              ))
#g_dev_bar1

In [88]:
dev_comp_bar = px.bar(partials(df),x='Factor',y='Impact', color = 'Variable',)
dev_comp_bar.update_layout({'paper_bgcolor': 'whitesmoke'},yaxis=dict(tickformat=".1%"), paper_bgcolor='whitesmoke',title="General uncertainty and risk's composition")

g_dev_bar2= go.FigureWidget(dev_comp_bar,
                           layout=go.Layout(
                                  title=dict(text="General uncertainty and risk's composition"#, x = 0.5
                                            ),
                                  barmode='relative',
                                  boxmode='group',
                                  #colorway=pc.sequential.Viridis,
                                  
                                  paper_bgcolor='whitesmoke',
                                  bargap = 0.5,
                                  bargroupgap=0.01,
                                  yaxis=dict(tickformat=".1%")
                              ))
#g_dev_bar2

#### Interaction functions

In [89]:
def response(change):
    filter_list = [i and j and k and l and m for i, j, k, l, m in 
                  zip((df['COUNTRY'] == select_country.value)^(select_country.value== 'ALL'),
                      (df['LOB'] == select_lob.value)^(select_lob.value == 'ALL'),
                      (df['SITE'] == select_site.value)^(select_site.value == 'ALL'),
                      (df['PR_SIZE'] == select_prsize.value)^(select_prsize.value == 'ALL'),
                      (df['MC_SIZE'] == select_csize.value)^(select_csize.value == 'ALL'))]
    
    temp_df = df[filter_list]
    x1 = temp_df['DEV_TOT']
    x2 = temp_df['DEV_RAN']
    x3 = temp_df['DEV_EVE']
    y1 = np.asarray(partials(temp_df)['Impact'])
    
    with g_dev_hist1.batch_update():
        g_dev_hist1.data[0].x = x1
    with g_dev_hist2.batch_update():
        g_dev_hist2.data[0].x = x2
        g_dev_hist2.data[1].x = x3
    with g_dev_bar1.batch_update():
        g_dev_bar1.data[0].y = y1

    return temp_df


    DEV_median_temp = [np.round(np.median(df['DEV_RAN']),decimals), np.round(np.median(df['DEV_EVE']),decimals), np.round(np.median(df['DEV_TOT']),decimals)]

    median1 = np.round(np.median(x1),decimals)
    num_pro_temp = len(x1)

def selection_stats(change, df):
    filter_list = [i and j and k and l and m for i, j, k, l, m in 
                  zip((df['COUNTRY'] == select_country.value)^(select_country.value== 'ALL'),
                      (df['LOB'] == select_lob.value)^(select_lob.value == 'ALL'),
                      (df['SITE'] == select_site.value)^(select_site.value == 'ALL'),
                      (df['PR_SIZE'] == select_prsize.value)^(select_prsize.value == 'ALL'),
                      (df['MC_SIZE'] == select_csize.value)^(select_csize.value == 'ALL'))]
    
    temp_df = df[filter_list]
    x1 = temp_df['DEV_TOT']
    x2 = temp_df['DEV_RAN']
    x3 = temp_df['DEV_EVE']
    
    with g_dev_hist1.batch_update():
        g_dev_hist1.data[0].x = x1
    with g_dev_hist2.batch_update():
        g_dev_hist2.data[0].x = x2
        g_dev_hist2.data[1].x = x3

    return df_stats(temp_df)


    #DEV_median_temp = [np.round(np.median(df['DEV_RAN']),decimals), np.round(np.median(df['DEV_EVE']),decimals), np.round(np.median(df['DEV_TOT']),decimals)]

    #median1 = np.round(np.median(x1),decimals)
    #num_pro_temp = len(x1)

#response(1)    

### Results

In [90]:
#Instruction to observe and execute filter changes
select_country.observe(response, names='value')
select_lob.observe(response, names='value')
select_site.observe(response, names='value')
select_prsize.observe(response, names='value')
select_csize.observe(response, names='value')
stat_selection.observe(response, names='value')

In [91]:
#Layout for the histogram
container1 = widgets.VBox([select_country, select_lob, select_site, select_prsize, select_csize])
container2 = widgets.VBox([container1, g_dev_hist1])
container3 = widgets.HBox([g_dev_bar2, g_dev_bar1])

container_main1 = widgets.VBox([container2, g_dev_hist2])
container_main2 = widgets.VBox([container_main1, container3])


container_main2

VBox(children=(VBox(children=(VBox(children=(VBox(children=(Dropdown(description='Country', layout=Layout(heig…

## 1.2 Descriptive analytics ("My company")

In [93]:
#Layout for the histogram
container = widgets.HBox([select_country, select_lob])
container2 = widgets.HBox([select_site, select_prsize, select_csize])
#container3 = widgets.HBox([response(1), g_dev_hist2])
#container4 = widgets.HBox([g_dev_hist2, g_dev_hist4])
widgets.VBox([container, container2, g_dev_hist1, g_dev_bar1])

df_c1 = df[(df['COUNTRY']=='Argentina')&(df['LOB']=='O&G - Downstream')&(df['MC_SIZE']=='Small')]

figh_c1_1 = go.Histogram(x=df_c1['DEV_TOT'], opacity=0.7, name='Total deviation',xbins={"size": hist_xbins_size})
figh_c1_2 = go.Histogram(x=df_c1['DEV_RAN'], opacity=0.5, name='Uncertainty',xbins={"size": hist_xbins_size/2})
figh_c1_3 = go.Histogram(x=df_c1['DEV_EVE'], opacity=0.5, name='Risk events',xbins={"size": hist_xbins_size/2})

dev_comp_c1_bar = px.bar(partials(df_c1),x='Factor',y='Impact', color = 'Variable',)
dev_comp_c1_bar.update_layout({'paper_bgcolor': 'whitesmoke'},yaxis=dict(tickformat=".1%"), height=130,paper_bgcolor='whitesmoke',title="General uncertainty and risk's composition")

g_dev_c1_bar= go.FigureWidget(dev_comp_c1_bar,
                           layout=go.Layout(
                                  title=dict(text="General uncertainty and risk's composition"#, x = 0.5
                                            ),
                                  barmode='relative',
                                  boxmode='group',
                                  #colorway=pc.sequential.Viridis,
                                  
                                  paper_bgcolor='whitesmoke',
                                  bargap = 0.5,
                                  bargroupgap=0.01,
                                  yaxis=dict(tickformat=".1%")
                              ))

g_dev_c1_hist1 = go.FigureWidget(data=[figh_c1_1],
                              layout=go.Layout(
                                  title=dict(text='Deviation distribution'),
                                  barmode='overlay',
                                  paper_bgcolor='whitesmoke',
                                  #plot_bgcolor='slategray'
                                  bargap = 0.01,
                                  xaxis=dict(tickformat=".1%")
                              ))
g_dev_c1_hist2 = go.FigureWidget(data=[figh_c1_2, figh_c1_3],
                              layout=go.Layout(
                                  title=dict(text="Uncertainty and risk's impact distribution", x = 0.5
                                            ),
                                  barmode='overlay',
                                  paper_bgcolor='whitesmoke',
                                  bargap = 0.01,
                                  xaxis=dict(tickformat=".0%")
                              ))



In [94]:

container21 = widgets.VBox([g_dev_c1_hist1])
container22 = widgets.HBox([g_dev_c1_hist2,g_dev_c1_bar])

container_main21 = widgets.VBox([container21, container22])

container_main21

VBox(children=(VBox(children=(FigureWidget({
    'data': [{'name': 'Total deviation',
              'opacity':…

## 2. Predictive analytics (on "my project")

#### 2.1 Projec's parameters - Size, contractors, site conditions, etc.

In [95]:
# Selection and text widgets to characterize the project to evaluate
select_prsize_ev = widgets.Dropdown(
    options=opt_prsize,
    value='ALL',
    description='Project size',
    layout=Layout(width=elements_width, height=elements_height),
    disabled=False,
    tooltip="Filter by Projects' size"
)

median1 = 0.23
duration = 24

stat_myproject_ev = widgets.HTML(
            value = "<h4>Statistics</h4>"+
            "Quantity of projects considered = "+str(2)+".<br>"+
            "The distribution based on your project's parameters has a median delay of "+ str(median1*100)+"%.<br>"+
            "The P50 duration according to the analysis is of "+str(duration*(1+median1))+".<br>", 
            placeholder = '', 
            description = ''
        )

container_21 = widgets.HBox([select_prsize_ev]) #incluir los containers que correspondan


#### 2.2 Historical delays distribution and statistics according to parameters

In [96]:
main_param_c1 = sp.stats.lognorm.fit(df_c1['DEV_TOT'])
x = np.linspace(0,1,int(1/hist_xbins_size))
gen_pdf_c1 = sp.stats.lognorm.pdf(x,main_param_c1[0],main_param_c1[1], main_param_c1[2])
gen_cdf_c1 = sp.stats.lognorm.cdf(x,main_param_c1[0],main_param_c1[1], main_param_c1[2])
scale = len(df_c1['DEV_TOT'])/(gen_pdf_c1.sum())
main_pdf_c1 = (gen_pdf_c1)
main_cdf_c1 = (gen_cdf_c1)

main_param_c1, scale


((0.003993674980380654, -22.04267743678075, 22.442541873266762),
 0.5151528235853426)

In [97]:
g_dev_hist4 = go.FigureWidget(data=[figh_c1_1],
                              layout=go.Layout(
                                  title=dict(text='Deviation distribution and Lognormal fit'),
                                  barmode='overlay',
                                  paper_bgcolor='whitesmoke',
                                  #plot_bgcolor='slategray'
                                  bargap = 0.01,
                                  xaxis=dict(tickformat=".1%")
                              ))
g_dev_hist4.add_scatter(y = main_pdf_c1, x = x, name = 'Lognormal fit pdf')


FigureWidget({
    'data': [{'name': 'Total deviation',
              'opacity': 0.7,
              'type': 'h…

#### Probability and cummulative distribution functions

In [98]:
#create an empty histogram to superpose pdf and cdf
granularity = 1000
hist_dumb = go.Histogram(x=np.zeros(len(df_c1['DEV_TOT'])), opacity=0.0, name='',xbins={"size": 1/granularity})

#x = np.linspace(0,1,granularity)
#gen_pdf_c1 = sp.stats.lognorm.pdf(x,main_param_c1[0],main_param_c1[1], main_param_c1[2])
#gen_cdf_c1 = sp.stats.lognorm.cdf(x,main_param_c1[0],main_param_c1[1], main_param_c1[2])

#scale = len(df['DEV_TOT'])/(gen_pdf.sum())
#main_pdf = (gen_pdf)*scale
#main_cdf = (gen_cdf)*scale

g_dev_fit_c1 = go.FigureWidget(data=[hist_dumb]*0,
                              layout=go.Layout(
                                  title=dict(text='Deviation distribution and Lognormal fit'),
                                  barmode='overlay',
                                  paper_bgcolor='whitesmoke',
                                  #plot_bgcolor= 'ghostwhite',#'slategray'
                                  bargap = 0.01,
                                  xaxis=dict(tickformat=".1%"),
                                  yaxis=dict(tickformat=".0%")
                              ))
g_dev_fit_c1.add_scatter(y = main_pdf_c1/np.max(main_pdf_c1), x = x, name = 'Lognormal fit pdf<br>(100% = mode)')
g_dev_fit_c1.add_scatter(y = gen_cdf_c1, x = x, name = 'Lognormal fit cdf')

#container_22 = widgets.HBox([stat_myproject_ev])
container_23 = widgets.VBox([g_dev_fit_c1])
container_23

VBox(children=(FigureWidget({
    'data': [{'name': 'Lognormal fit pdf<br>(100% = mode)',
              'type'…

## 3. Prescriptive analytics I (posible risks to mitigate and ranges to adjust for a required P50 / P80)

## 4. Prescriptive analytics II (monthly follow-up and mitigation measures revision)