# Masterthesis
## Create Plots for Machine Learning results

This script create box plots to visualize the machine learning results.

**Imports and Definitions**
- The necessary libraries are loaded here and important variables are defined

**Imports and settings for this script**
- Import libraries and set variables for this script

**Create plots of machine learning results**
- Result files are loaded to create box plots 

**Create confusion matrix**


## Imports and Definitions

In [None]:
# Import sklearn
import sklearn

# Import pandas
import pandas as pd

# Import numpy
import numpy as np

# To calculate amplitude and phase
import math

# Measure runtime of a jupyter jotebook code cell
from timeit import default_timer as timer

# Used to check if file exists
import os

# Used to check if directory exists
import pathlib

# Import Operation System Calls
import SubOperationSystem

# check os
if os.name == 'nt':
    print("OS is Windows")
    Delimiter = '\\'
    
else:
    print("OS is Linux")
    Delimiter = '/'
    
# Path of datasets (root directory)
PathDataset = 'Dataset' + Delimiter    

# Path of datasets
PathDatasetSub = PathDataset + 'CsiFilesRah' + Delimiter
        
# Path of the converted files
PathConverted = PathDataset + 'Converted' + Delimiter

# Set path for scenario files
PathScenario = PathDataset + 'Scenario' + Delimiter

# Set path for scenario files
PathResult = PathDataset + 'Result' + Delimiter

# Set path for scenario files
PathPlot = PathDataset + 'Plot' + Delimiter

# Set path for scenario files
PathConfig = 'FilesConfig' + Delimiter

# Scenariofile (file with info about the ten scenarios)
FileScenario = 'FileScenario.csv'

# Mappingfile (file with info about original and converted filenames)
FileMapping = 'FileMapping.csv'

# Imports and settings for this script

Bevor running set the parameter "PCA" to True / False

With "PCA=True" the plot with the pca results will be created, else without PCA


In [None]:
# Import libraries
import matplotlib.pyplot as plt

# Import warings and set warn level to ignore
import warnings
warnings.filterwarnings("ignore")

####################################################################################

# Select if the plots with or without PCA should be create
# Set PCA to True or False

Pca = False
# Pca = True

####################################################################################

# Set file for algorithm
FileAlgorithm = 'FileAlgorithm.csv'

# Set path where the result files saved
if not Pca:
    print('Result without PCA')
    PathResult = PathResult + 'ResultWithoutPca' + Delimiter
       
else:
    print('Result with PCA')
    PathResult = PathResult + 'ResultWithPca' + Delimiter
    

# Suffic for plot
Label = '' if not Pca else '_pca'   
    

## Create plots of machine learning results

This script read the result files and create the plots to save it as png file.

Following files are needed

* Scenario file - this file contrains the scenarios
* Mapping file - with mapping between the number and the name of the dataset
* Algorithm file - as a list of the used algrithm
* (*).acc file - with the ML results
* The results are saved in 'PathPlot'

In [None]:
# Open scenario mapping file to read
dfScenario = pd.read_csv(PathConfig + FileScenario)

# Loop through scenario dataframe to get movements
for ind in dfScenario.index:

    # Create empty lists
    DataToPlot = []
    ScenarienCounter = []
    listAlgorithmDisplayName = []
    listMovementDisplayName = []
    
    # Get scenarios and dataset from dataframe
    Scenario,Datasets = (dfScenario['Scenario'][ind], dfScenario['Datasets'][ind])
    
    # Get values of movements
    DatasetItems=list(Datasets.split())

    # Read Filemapping file. This file contains the name for movements
    dfFileMapping = pd.read_csv(PathConfig + FileMapping ,names=['LineNumber','FilenameOld','FilenameNew'], skiprows=1)
        
    # Loop through DatasetItems
    for DatasetItem in DatasetItems:
            
        # Loop through mapping dataframe to get number of movments
        for ind in dfFileMapping.index:
    
            # Get scenarios and dataset from dataframe
            LineNumber,FilenameNew = (dfFileMapping['LineNumber'][ind], dfFileMapping['FilenameNew'][ind])
            
            # Test if Scenario equal ScenarioName
            if int(DatasetItem) == LineNumber:
                
                # Then replace
                FilenameNew = FilenameNew.replace(".csv","")
                FilenameNew = FilenameNew.replace("_"," ")
                                
                # And append to list
                listMovementDisplayName.append(FilenameNew)
    
    # Read algorithm file to get count of used algorithm an name of algorithm
    dfAlgorithm = pd.read_csv(PathConfig + FileAlgorithm,names=['LineNumber','Algorithm','DisplayNameAlgorithm'], skiprows=1)
    
    # Get algorithmn display name
    listAlgorithmDisplayName = dfAlgorithm['DisplayNameAlgorithm'].tolist()
    
    # Loop through algorithm
    for Algorithm in dfAlgorithm.index:
    
        # Read the txt file which is formatted as a csv into a dataframe and name
        dfResults = pd.read_csv(PathResult + Scenario + "_" + dfAlgorithm['Algorithm'][Algorithm] + ".csv")
        
        # Set name for algorithm
        dfResults.rename(columns={dfResults.columns[0]: "algorithm"}, inplace = True)
        
        # Drop unwanted columns
        dfResults = dfResults.drop('precision', axis=1)
        dfResults = dfResults.drop('recall', axis=1)
        dfResults = dfResults.drop('support', axis=1)
        
        # Drop unwanted rows
        dfResults = dfResults.drop(dfResults[dfResults['algorithm'] == 'accuracy'].index)
        dfResults = dfResults.drop(dfResults[dfResults['algorithm'] == 'macro avg'].index)
        dfResults = dfResults.drop(dfResults[dfResults['algorithm'] == 'weighted avg'].index)
    
        # Create a empty list
        listScenario = []    
            
        # Loop through DatasetItems
        for DatasetItem in DatasetItems:

            # Loop through result file
            for ind in dfResults.index:
                
                # Read algorithm and f1-score
                dfResultAlgorithm, dfResultF1Score = (dfResults['algorithm'][ind], dfResults['f1-score'][ind])

                # Compare DatasetItem and dfResultAlgorithm
                if int(DatasetItem) == int(dfResultAlgorithm):
                    
                    # Add dfResultF1Score to list
                    listScenario.append(dfResultF1Score)
        
        # Add to Scenario list
        DataToPlot.append(listScenario)
    
    # Transpose the list
    DataToPlot = list(map(list, zip(*DataToPlot)))
    
    # Count groups (= Algorithm)
    CounterAlgorithm = np.arange(len(dfAlgorithm))
    
    # Create plot
    fig, ax = plt.subplots()
    
    # Set height of y-axis
    plt.ylim(0, 10)
    
    # Add subplots
    ax = fig.add_axes([0,0,1,1])
  
    # Create bar depend on the List of the List
    for bar in range(len(DataToPlot)):

        # We need other settings if plots are more then 14 bars
        if len(DataToPlot) < 15:
            widthBar = 0.1
            spaceBar = 10
        else:
            widthBar = 0.05
            spaceBar = 20
            
        # Create bars
        ax.bar(CounterAlgorithm + (bar/spaceBar), DataToPlot[bar], width = widthBar)
     
    # Count listAlgorithmDisplayName
    arangeLADN = np.arange(len(listAlgorithmDisplayName))
    
    # Get the number of items
    countLADN = len(DataToPlot)
    
    # Settings for plots
    if countLADN == 3:
        xticksWidth = 0.1
    elif countLADN == 5:
        xticksWidth = 0.2
    else:
        xticksWidth = 0.35
    
    # Settings for xticks
    ax.set_xticks(arangeLADN + xticksWidth, listAlgorithmDisplayName, rotation=90) 
    
    # Set legend
    ax.legend(listMovementDisplayName, bbox_to_anchor=(1.05, 1), loc='upper left' )
    
    # Set scenario name (with oder without PCA)
    PlotScenarioName = Scenario if not Pca else Scenario + ' - With PCA'
    
    # Set scenario name for title
    PlotScenarioName = PlotScenarioName.replace("Scenario","Szenario")
    PlotScenarioName = PlotScenarioName.replace("10","xx")
    PlotScenarioName = PlotScenarioName.replace("0"," ")
    PlotScenarioName = PlotScenarioName.replace("xx"," 10")
        
    # Set title, x and y label
    ax.set(title=PlotScenarioName, ylabel='F1-Score in %', xlabel='Algorithmen')
    
    # Create plot
    plt.savefig(PathPlot + Scenario + Label, dpi=300, bbox_inches = 'tight')
    
    # Show plot
    plt.show();


# Create Plot for SHAP Summary

In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Data to plot
X = np.array(["6","58","7","43","44","45","34","49","46","33","38","39","13","53"])
y = np.array([11,10,5,4,4,4,3,3,3,3,3,3,3,3])

# Create plots
plt.figure(figsize=(10, 6))
plt.bar(X,y)
plt.xlabel('Bezeichung der Subträger')
plt.ylabel('Anzahl der Subträger')
plt.title('Übersicht der Subträger mit den höchsten Score')
plt.savefig(PathPlot + 'SummaryPlotSubcarrierShap.png')
plt.show()

# Create confusion matrix

# Imports and Settings for this script

In [None]:

# Import for confusion matrix
from sklearn.metrics import confusion_matrix

# Import mathplotlib for plots
import matplotlib.pyplot as plt

# Import searborn
import seaborn as sns

# Set algorithm file
FileAlgorithm = 'FileAlgorithm.csv'

# Extension for this plots
FileExtension = '_cm'

In [None]:
# Open scenario mapping file to read
dfScenario = pd.read_csv(PathConfig + FileScenario)

# Read file mapping
dfFiles = pd.read_csv(PathConfig + FileMapping)

# Read algorithm file to get count of used algorithm an name of algorithm
dfAlgorithm = pd.read_csv(PathConfig + FileAlgorithm)
   
# Loop through scenario dataframe to get movements
for indexScenario in dfScenario.index:

    # Get scenarios and dataset from dataframe
    Scenario,Datasets = (dfScenario['Scenario'][indexScenario],dfScenario['Datasets'][indexScenario])
   
    # List for column and row names
    listColumns = []
        
    # split to dataset items because we need a int value
    DatasetItems=list(Datasets.split())
    
    # loop through list
    for DatasetItem in DatasetItems:

        # filenames of needed dataset in the column 'FilenameNew' (-1 because the index beginn at 0)
        DatasetFilenames = dfFiles.loc[int(DatasetItem)-1]['FilenameNew']
            
        # Clean up
        DatasetFilenames = DatasetFilenames.replace(".csv","")
        DatasetFilenames = DatasetFilenames.replace("_"," ")
        
        # listMovementDisplayName.append(FilenameNew)
        listColumns.append(DatasetFilenames) 
    
    # Loop through algorithm
    for indexAlgorithmus in dfAlgorithm.index:

        # Get name of algorithm
        Algorithm = (dfAlgorithm['Algorithm'][indexAlgorithmus])
        
        # Open scenario mapping file to read
        dfCm = pd.read_csv(PathResult + Scenario + '_' + Algorithm + '.cm', index_col=0)

        # Fehler
        dfCm = dfCm.set_axis(listColumns, axis=1)

        # Fehler
        dfCm = dfCm.set_axis(listColumns, axis=0)
        
        # Transpose data
        mcmc = pd.DataFrame(dfCm).transpose()

        # Count of columns
        countColumns = dfCm.shape[1]
        
        # Set size of matrix
        plt.figure(figsize=(countColumns, countColumns))
        
        # Data to plot
        cm_df = pd.DataFrame(dfCm, index = listColumns, columns = listColumns)
              
        # Plot confusion matrix, cmap = color, fmt = format of values
        sns.heatmap(cm_df, annot = True, cmap = "YlGnBu", fmt =' .2f')
        
        # Set title name
        plt.title('Konfusionsmatrix - ' + Scenario + ' - ' + Algorithm, y=1.1)

        # Set ylabel
        plt.ylabel('Prognostizierte Klasse')
        
        # Set xlabel
        plt.xlabel('Tatsächliche Klasse')
        
        # Rotate xticks
        plt.xticks(rotation=90)
        
        # Rotate yticks
        plt.yticks(rotation=0)
        
        # Set tight layout
        plt.tight_layout()
        
        # Save plot
        plt.savefig(PathPlot + Scenario + '_' + Algorithm + FileExtension + '.png', bbox_inches = "tight")

        # Show plot
        plt.show()