In [1]:
import dataPreparationUtils as dpu
from scipy.stats import gamma, lognorm, pareto
from rpy2.robjects.packages import importr
import rpy2.robjects as ro
import pandas as pd
import numpy as np
import os
from rpy2.robjects import numpy2ri

In [2]:
import numpy as np
import pandas as pd
import itertools
import os
import readDataV2
import latexCodeGenV2

In [3]:
FILE_DIR = "small"
nExperimentReptition = 10
randomSeed = 20220222
trueValue = 0.005
dataDistributions = ['gamma', 'lognorm', 'pareto']
dataSizes = [500]

## 1. Read data

- Tail probability estimation with single threshold
- Tail probability estimation with multiple thresholds
- Quantile estimation with single threshold
- Quantile estimation with multiple thresholds

In [3]:
# tb1 = readDataV2.tableOne().groupby(by=['dataDistribution','dataSize','percentageLHS','thresholdPercentage'])
# tb2 = readDataV2.tableTwo().groupby(by=['dataDistribution','dataSize','quantitleValue','thresholdPercentage'])
# tb31 = readDataV2.tableThreeOne().groupby(by=['dataDistribution','dataSize','percentageLHS','thresholdPercentage'])
# tb32 = readDataV2.tableThreeTwo().groupby(by=['dataDistribution','dataSize','percentageLHS','thresholdPercentage'])
# tb33 = readDataV2.tableThreeThree().groupby(by=['dataDistribution','dataSize','quantitleValue','thresholdPercentage'])
# tb34 = readDataV2.tableThreeFour().groupby(by=['dataDistribution','dataSize','quantitleValue','thresholdPercentage'])
# tb4 = readDataV2.tableFour().groupby(by=['dataDistribution','dataSize','percentageLHS','thresholdPercentage'])
tb5 = pd.read_csv(os.path.join(FILE_DIR, 'tableFive_bayesian.csv')).groupby(by=['Data Source', 'nData', 'percentageLHS'])


## Latex Code Generation

## Table 1

In [5]:
dataSources = ["gamma","lognorm", "pareto"]
nDatas = [500]
percentageLHS = 0.99
thresholdPercentage = 0.70
targetColumns = ['(0,CHI2)','(1,CHI2)','(2,CHI2)','(0,KS)','(1,KS)','(2,KS)']
text_file = open("./robustTailTable/table1.tex", "w")
text_file.write(latexCodeGenV2.getTableOne(tb1, dataSources, nDatas, percentageLHS, thresholdPercentage, targetColumns, 
                   "Tail probability estimation under different constraint settings. ",
                   "tb1_tpe", 0.8).strip())
text_file.close()

## Table 2

In [4]:
dataSources = ["gamma","lognorm", "pareto"]
nDatas = [500]
percentageLHS = 0.99
thresholdPercentage = 0.70
targetColumns = ['(0,CHI2)','(1,CHI2)','(2,CHI2)']
text_file = open("./robustTailTable/table2.tex", "w")
text_file.write(latexCodeGenV2.getTableTwo(tb2, dataSources, nDatas, percentageLHS, thresholdPercentage, targetColumns, 
                   "Quantile estimation under different constraint settings. ",
                   "tb2_qe", 0.75).strip())
text_file.close()

## Table 3

In [8]:
dataSources = ["gamma","lognorm", "pareto"]
nDatas = [500]
percentageLHS = 0.99
thresholdPercentages = 0.6
targetColumns = ['(2,CHI2)']
text_file = open("./robustTailTable/table3_tpe.tex", "w")
text_file.write(latexCodeGenV2.getTableThree(tb31, tb32, dataSources, nDatas, percentageLHS, thresholdPercentages, targetColumns, 
                   "Tail probablity estimation under different cutoff threshold(s).",
                   "tb3_tpe", 0.75).strip())
text_file = open("./robustTailTable/table3_qe.tex", "w")
text_file.write(latexCodeGenV2.getTableThree(tb33, tb34, dataSources, nDatas, percentageLHS, thresholdPercentages, targetColumns, 
                   "Quantile estimation under different cutoff threshold(s). ",
                   "tb3_qe", 0.75, True).strip())
text_file.close()

## Table 4

In [9]:
dataSources = ["gamma","lognorm", "pareto"]
nDatas = [500]
percentageLHSs = np.linspace(0.9, 0.99, 10).tolist()
thresholdPercentage = 0.7
targetColumns = ['(2,CHI2)','(2,KS)']
text_file = open("./robustTailTable/table4.tex", "w")
text_file.write(latexCodeGenV2.getTableFour(tb4, dataSources, nDatas, percentageLHSs, thresholdPercentage, targetColumns, 
                   "Tail probability estimation under different objective functions. ",
                   "tb4_tpe_{:}".format(thresholdPercentage),0.6).strip())
text_file.close()

## Table 5:

## Peak over threshold

In [8]:
dataSources = ["gamma","lognorm", "pareto"]
nDatas = [500]
percentageLHSs = np.linspace(0.9, 0.95, 11).tolist()
targetColumns = ['Upper Bound']
text_file = open("./robustTailTable/table5.tex", "w")
text_file.write(latexCodeGenV2.getTableFive(tb5, dataSources, nDatas, percentageLHSs, targetColumns,
                   "Tail probability estimation using Peak-Over-Threshold.",
                   "tb5_tpe_pot",0.8).strip())
text_file.close()

## Profile likelihood method

In [None]:
dataSources = ["gamma","lognorm", "pareto"]
nDatas = [500]
percentageLHSs = [0.9, 0.95, 0.99]
targetColumns = ['Upper Bound']
text_file = open("./robustTailTable/table5_w_pl.tex", "w")
text_file.write(latexCodeGenV2.getTableFive(tb5, dataSources, nDatas, percentageLHSs, targetColumns,
                   "Tail probability estimation using profile likelihood.",
                   "tb5_tpe_profile_likelihood",0.8).strip())
text_file.close()


## Bayesian method

In [11]:
file_dir = 'large_table5_bayesian'
df = pd.DataFrame()
for percentageLHS in ["0.9", "0.95", "0.99"]:
    for dataSource in ["gamma","lognorm", "pareto"]:
        df = pd.concat([df, pd.read_csv(f'{file_dir}/table5_{percentageLHS}_{dataSource}.csv')], axis=0)
tb5 =  df.groupby(by=['Data Source', 'nData', 'percentageLHS'])

In [12]:
import numpy as np
dataSources = ["gamma","lognorm", "pareto"]
nDatas = [500]
percentageLHSs = [0.9, 0.95, 0.99]
targetColumns = ['Upper Bound']
text_file = open("./robustTailTable/table5_w_bayesian.tex", "w")
text_file.write(latexCodeGenV2.getTableFive(tb5, dataSources, nDatas, percentageLHSs, targetColumns,
                   "Tail probability estimation using Bayesian.",
                   "tb5_tpe_bayesian",0.8).strip())
text_file.close()

## Probability-weighted moment methods

In [7]:
import pandas as pd

In [8]:
file_dir = 'large_table5_pwm'
df = pd.DataFrame()
for percentageLHS in ["0.9", "0.95", "0.99"]:
    for dataSource in ["gamma","lognorm", "pareto"]:
        df = pd.concat([df, pd.read_csv(f'{file_dir}/table5_{percentageLHS}_{dataSource}.csv')], axis=0)
tb5 =  df.groupby(by=['Data Source', 'nData', 'percentageLHS'])

In [10]:
import numpy as np
dataSources = ["gamma","lognorm", "pareto"]
nDatas = [500]
percentageLHSs = [0.9, 0.95, 0.99]
targetColumns = ['Upper Bound']
text_file = open("./robustTailTable/table5_w_pwm.tex", "w")
text_file.write(latexCodeGenV2.getTableFive(tb5, dataSources, nDatas, percentageLHSs, targetColumns,
                   "Tail probability estimation using probability-weighted moment.",
                   "tb5_tpe_pwm",0.8).strip())
text_file.close()