In [1]:
import pandas as pd
import numpy as np
from sklearn import preprocessing
import pytest
import unittest
from pandas.testing import assert_frame_equal

from mcda.mcda_run import main
from mcda.utils import *
from mcda.configuration.config import Config
from mcda.utility_functions.normalization import Normalization



In [2]:
input_matrix = read_matrix("tests/resources/input_matrix_without_uncert.csv")

In [3]:
input_matrix

Unnamed: 0,alternatives,ind1,ind2,ind3,ind4,ind5,ind6
0,alt1,15.2,8.2,0.04,0.02,24.5,6.2
1,alt2,12.4,8.7,0.05,0.02,24.5,4.8
2,alt3,1.6,2.0,0.11,0.14,14.0,0.6
3,alt4,39.7,14.0,0.01,0.02,26.5,4.41


In [4]:
imd = input_matrix.drop(input_matrix.columns[0],axis=1)

In [5]:
imd

Unnamed: 0,ind1,ind2,ind3,ind4,ind5,ind6
0,15.2,8.2,0.04,0.02,24.5,6.2
1,12.4,8.7,0.05,0.02,24.5,4.8
2,1.6,2.0,0.11,0.14,14.0,0.6
3,39.7,14.0,0.01,0.02,26.5,4.41


In [6]:
polarities = ("-","-","+","+","+","+")

In [7]:
indeces_plus = [i for i, e in enumerate(polarities) if e == "+"]
indeces_minus = [i for i, e in enumerate(polarities) if e == "-"]

In [8]:
print(type(indeces_plus))
print(indeces_minus)

<class 'list'>
[0, 1]


In [9]:
# from input matrix select columns that go into minmax and the ones that go into reversed minmax
plus_pol = imd.iloc[:, indeces_plus]
minus_pol = imd.iloc[:, indeces_minus]

In [10]:
plus_pol

Unnamed: 0,ind3,ind4,ind5,ind6
0,0.04,0.02,24.5,6.2
1,0.05,0.02,24.5,4.8
2,0.11,0.14,14.0,0.6
3,0.01,0.02,26.5,4.41


In [11]:
plus_pol.mean(axis=0)

ind3     0.0525
ind4     0.0500
ind5    22.3750
ind6     4.0025
dtype: float64

In [12]:
plus_pol-plus_pol.mean(axis=0)

Unnamed: 0,ind3,ind4,ind5,ind6
0,-0.0125,-0.03,2.125,2.1975
1,-0.0025,-0.03,2.125,0.7975
2,0.0575,0.09,-8.375,-3.4025
3,-0.0425,-0.03,4.125,0.4075


## rank

In [None]:
imd

In [None]:
-1*imd

In [None]:
norm = Normalization(imd, polarities)
indicators_scaled_rank = norm.rank()

In [None]:
indicators_scaled_rank

In [None]:
# if you save the file it might compromise the unit tests!
#indicators_scaled_rank.to_csv('tests/resources/normalization/res_rank.csv',sep=';',decimal='.') 

## standardized

In [None]:
indicators_scaled_standard_plus = (plus_pol - plus_pol.mean(axis=0))/plus_pol.std(axis=0)

In [None]:
indicators_scaled_standard_plus

In [None]:
norm = Normalization(imd,polarities)
indicators_scaled_stand = norm.standardized()

In [None]:
indicators_scaled_stand

In [None]:
# if you save the file it might compromise the unit tests!
#indicators_scaled_stand.to_csv('tests/resources/normalization/res_standardized.csv',sep=';',decimal='.') 

## minmax

#### identify positive and negative polarities

In [None]:
x_plus = plus_pol.values # returns a numpy array
x_minus = minus_pol.values
x_minus

In [None]:
x_minus_test = minus_pol.to_numpy()
x_minus_test

In [None]:
min_max_scaler = preprocessing.MinMaxScaler(feature_range=(0, 1), copy=False)
x_scaled = min_max_scaler.fit_transform(x_plus)

In [None]:
x_scaled

In [None]:
indicators_scaled_minmax_plus = pd.DataFrame(x_scaled)

In [None]:
indicators_scaled_minmax_plus

In [None]:
def reversed_minmax_scaler(data):
    """
    Rescales the given data to the range [0, 1] in a reversed scale
    where the smallest the value the better, using MinMaxScaler.

    Args:
    - data: A list or numpy array of numeric data to be scaled.

    Returns:
    - scaled_data: The scaled data as a numpy array.
    """
    import numpy as np
    
    # Convert data to numpy array
    data = np.array(data)
    print(data)
    print('-----')
    
    # Calculate the maximum and minimum values
    max_val = np.max(data)
    max_val_test = np.max(data, axis=0)
    
    #print(max_val)
    #print('-----')
    print(max_val_test)
    print('-----')
    
    min_val = np.min(data)
    min_val_test = np.min(data, axis=0)
    
    # Rescale the data to the range [0, 1]
    scaled_data = (data - min_val) / (max_val - min_val)
    scaled_data_test = (data - min_val_test) / (max_val_test - min_val_test)#*(1-0.1) + 0.1
    
    print("scaled_data")
    print('-----')
    print(scaled_data_test)
    
    return scaled_data_test


In [None]:
indicators_scaled_minmax_minus = reversed_minmax_scaler(x_minus)

In [None]:
(8.2-2)/(14-2)

In [None]:
indicators_scaled_minmax_minus = pd.DataFrame(indicators_scaled_minmax_minus)

In [None]:
indicators_scaled_minmax_minus

In [None]:
indicators_scaled_minmax = pd.DataFrame(index=range(4),columns=range(6))
indicators_scaled_minmax.shape

In [None]:
indicators_scaled_minmax_plus

In [None]:
# merge back the plus and minus together
indicators_scaled_minmax = pd.DataFrame(columns=range(6))
for i,index_p in enumerate(indeces_plus): indicators_scaled_minmax.iloc[:, index_p] = indicators_scaled_minmax_plus.iloc[:, i]
for j, index_n in enumerate(indeces_minus): indicators_scaled_minmax.iloc[:, index_n]=indicators_scaled_minmax_minus.loc[:,j]

In [None]:
indicators_scaled_minmax

In [None]:
imd

In [17]:
# test class
norm = Normalization(imd, polarities)
test_res_no0 = norm.minmax(feature_range=(0.1, 1))
test_res_no0

Unnamed: 0,0,1,2,3,4,5
0,0.67874,0.535,0.37,0.1,0.856,1.0
1,0.744882,0.4975,0.46,0.1,0.856,0.775
2,1.0,1.0,1.0,1.0,0.1,0.1
3,0.1,0.1,0.1,0.1,1.0,0.712321


In [18]:
# if you save the file it might compromise the unit tests!
#test_res_no0.to_csv('tests/resources/normalization/res_minmax_no0.csv',sep=';',decimal='.') 

## target

In [None]:
imd

In [None]:
norm=Normalization(imd, polarities)
res_target_01 = norm.target(feature_range=(0, 1))
res_target_no0 = norm.target(feature_range=(0.1, 1))

In [None]:
# if you save the file it might compromise the unit tests!
#res_target_01.to_csv('tests/resources/normalization/res_target_01.csv',sep=';',decimal='.') 
#res_target_no0.to_csv('tests/resources/normalization/res_target_no0.csv',sep=';',decimal='.') 

In [None]:
res_target_01

In [None]:
res_target_no0

In [None]:
col = imd.iloc[:,2]
col1 = (col/max(col))*(1-0.1) + 0.1

In [None]:
col1

## assert two df are the same

In [None]:
input_matrix = read_matrix("tests/resources/input_matrix_without_uncert.csv")
input_matrix = input_matrix.drop(input_matrix.columns[0],axis=1) 

In [None]:
expected_res = read_matrix('tests/resources/normalization/res_minmax_no0.csv')

In [None]:
norm = Normalization(input_matrix)
res = norm.minmax(feature_range=(0, 1))

In [None]:
print(type(input_matrix))
print(type(expected_res))
print(type(res))

In [None]:
res

In [None]:
res.columns = res.columns.astype('str')

In [None]:
res

In [None]:
expected_res

In [None]:
assert_frame_equal(res, expected_res, check_like=True)

In [None]:
isinstance(res, pd.DataFrame)

In [None]:
feature_range=(0, 1)

In [None]:
lista = (5,4,3,2)
sum(lista)

In [None]:
lista_corr = [val/sum(lista) for val in lista]

In [None]:
lista_corr

In [None]:
sum(lista_corr)

In [None]:
round(lista_corr,2)

In [None]:
config = {
  "input_matrix_path": "tests/resources/input_matrix_without_uncert.csv",
  "marginal_distribution_for_each_indicator": ["exact", "exact", "exact", "exact", "exact", "exact"],
  "polarity_for_each_indicator": ["-","-","+","+","+","+"],
  "monte_carlo_runs": 0,
  "no_cores": 1,
  "weight_for_each_indicator" : [0.5, 0.5, 0.5, 0.5, 0.5, 0.5],
  "output_file_path": "/path/to/output.csv"
}

In [None]:
type(config)

In [None]:
import copy
test_config = copy.deepcopy(config)

In [None]:
type(test_config)

In [None]:
import unittest
import pytest
#isinstance(config, dict)
isinstance(test_config, dict)

### summing columns given a row

In [43]:
imd

Unnamed: 0,ind1,ind2,ind3,ind4,ind5,ind6
0,15.2,8.2,0.04,0.02,24.5,6.2
1,12.4,8.7,0.05,0.02,24.5,4.8
2,1.6,2.0,0.11,0.14,14.0,0.6
3,39.7,14.0,0.01,0.02,26.5,4.41


In [25]:
imd.sum(axis=1)

0    54.16
1    50.47
2    18.45
3    84.64
dtype: float64

In [26]:
15.2+8.2+0.04+0.02+24.5+6.2

54.16

In [44]:
weights=(1,0,1,0,1,10)

In [32]:
test = imd*weights

In [33]:
test.sum(axis=1)

0    101.74
1     84.95
2     21.71
3    110.31
dtype: float64

In [31]:
(imd*weights).sum(axis=1)

0    101.74
1     84.95
2     21.71
3    110.31
dtype: float64

In [47]:
(imd**weights).product(axis=1)

0    1.250220e+09
1    9.862117e+07
2    1.489887e-02
3    2.926997e+07
dtype: float64

In [46]:
15.2*1.0*0.04*1.0*24.5*8.392994e+07

1250220386.24

In [48]:
imd

Unnamed: 0,ind1,ind2,ind3,ind4,ind5,ind6
0,15.2,8.2,0.04,0.02,24.5,6.2
1,12.4,8.7,0.05,0.02,24.5,4.8
2,1.6,2.0,0.11,0.14,14.0,0.6
3,39.7,14.0,0.01,0.02,26.5,4.41


In [49]:
imd.iloc[0,0]=0

In [50]:
imd

Unnamed: 0,ind1,ind2,ind3,ind4,ind5,ind6
0,0.0,8.2,0.04,0.02,24.5,6.2
1,12.4,8.7,0.05,0.02,24.5,4.8
2,1.6,2.0,0.11,0.14,14.0,0.6
3,39.7,14.0,0.01,0.02,26.5,4.41


In [51]:
imd.values

array([[0.00e+00, 8.20e+00, 4.00e-02, 2.00e-02, 2.45e+01, 6.20e+00],
       [1.24e+01, 8.70e+00, 5.00e-02, 2.00e-02, 2.45e+01, 4.80e+00],
       [1.60e+00, 2.00e+00, 1.10e-01, 1.40e-01, 1.40e+01, 6.00e-01],
       [3.97e+01, 1.40e+01, 1.00e-02, 2.00e-02, 2.65e+01, 4.41e+00]])

In [66]:
any(x==0 for x in imd.values) 

ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()

In [60]:
for x in imd.values: print(x)

[0.00e+00 8.20e+00 4.00e-02 2.00e-02 2.45e+01 6.20e+00]
[1.24e+01 8.70e+00 5.00e-02 2.00e-02 2.45e+01 4.80e+00]
[ 1.6   2.    0.11  0.14 14.    0.6 ]
[3.97e+01 1.40e+01 1.00e-02 2.00e-02 2.65e+01 4.41e+00]


In [69]:
(imd == 100).any().any()

False

In [74]:
imd

Unnamed: 0,ind1,ind2,ind3,ind4,ind5,ind6
0,0.0,8.2,0.04,0.02,24.5,6.2
1,12.4,8.7,0.05,0.02,24.5,4.8
2,1.6,2.0,0.11,0.14,14.0,0.6
3,39.7,14.0,0.01,0.02,26.5,4.41


In [78]:
weights=(1,2,1,2,1,10)

In [82]:
1/0.05

20.0

In [79]:
weights/imd

Unnamed: 0,ind1,ind2,ind3,ind4,ind5,ind6
0,inf,0.243902,25.0,100.0,0.040816,1.612903
1,0.080645,0.229885,20.0,100.0,0.040816,2.083333
2,0.625,1.0,9.090909,14.285714,0.071429,16.666667
3,0.025189,0.142857,100.0,100.0,0.037736,2.267574


In [83]:
(weights/imd).sum(axis=1)

0           inf
1    122.434680
2     41.739719
3    202.473356
dtype: float64

In [75]:
scores = 6/((weights/imd).sum(axis=1))

In [76]:
scores

0    0.000000
1    0.270212
2    0.226809
3    0.058634
dtype: float64

In [86]:
imd

Unnamed: 0,ind1,ind2,ind3,ind4,ind5,ind6
0,0.0,8.2,0.04,0.02,24.5,6.2
1,12.4,8.7,0.05,0.02,24.5,4.8
2,1.6,2.0,0.11,0.14,14.0,0.6
3,39.7,14.0,0.01,0.02,26.5,4.41


In [85]:
imd.min(axis=1)

0    0.00
1    0.02
2    0.11
3    0.01
dtype: float64