# CLICK ME !!! Mount shared folder

In [1]:
from google.colab import drive
import os

gdrive_path='/content/gdrive/MyDrive/Robust DevOps: Chiemsee/sorted-results'

# This will mount your google drive under 'MyDrive'
drive.mount('/content/gdrive', force_remount=True)


Mounted at /content/gdrive


# check if we are in the correct dir

In [2]:
subdirectories = [d for d in os.listdir(gdrive_path) if os.path.isdir(os.path.join(gdrive_path, d))]
subdirectories

['alpine',
 'aos',
 'bulma',
 'gatsby',
 'ionic',
 'react-beautiful-dnd',
 'react-hook-form',
 'uppy',
 'vuecli',
 'n8n',
 'mermaid',
 'FreeCodeCamp']

## Intermediate step

In [3]:
import os
import pandas as pd
import matplotlib as plt

THRESHOLD = 0

def get_all_csv_files(path):
    # This function will return a list of paths to all csv files in the given path and its subdirectories
    csv_files = []
    for root, dirs, files in os.walk(path):
        for file in files:
            if file.endswith('.csv'):
                csv_files.append(os.path.join(root, file))
    return csv_files

def process():
  # Get all csv files in the working directory and its subdirectories
  csv_files = get_all_csv_files(os.getcwd())
  print(csv_files)

  # Initialize a dataframe with the first CSV file
  # df = pd.read_csv(csv_files[0])

  # Find the "none_none.csv" file
  start_file = "none_none.csv"
  start_file_full_path = None

  for file in csv_files:
      if os.path.basename(file) == start_file:
          start_file_full_path = file
          csv_files.remove(file)
          break

  if start_file_full_path is None:
      raise FileNotFoundError(f"{start_file} not found in the list of csv files.")


  # Initialize a dataframe with the "none_none.csv" file
  df = pd.read_csv(start_file_full_path)

  # Loop over the rest of the CSV files and merge them with the main dataframe
  for file in csv_files:
      df_file = pd.read_csv(file)
      df = pd.merge(df, df_file, on='testcase_name', how='inner')

  df = df.fillna(0)
  #display(df)
  # remove testcase that behave the same as baseline
  mask = (df.drop(columns=['testcase_name', 'none_none']).sub(df['none_none'], axis=0).abs() > 1e-6).any(axis=1)
  #different_from_baseline = df[mask]
  # We mustn't filter because we need all test cases to count correctly and get the total_number of test cases later
  different_from_baseline = df
  #display(different_from_baseline)

  #compute positive and negative difference matix from baseline
  noise_conditions = different_from_baseline.columns.drop(['testcase_name'])
  # Create a copy of the dataframe to store the results
  df_standerlized = different_from_baseline[['testcase_name']].copy()

  # Calculate the difference for each noise condition
  for noise in noise_conditions:
      # if fail rate is lower than baseline
      #lower_than_base = different_from_baseline[noise] < different_from_baseline['none_none']
      #df_standerlized.loc[lower_than_base, noise] = different_from_baseline.loc[lower_than_base, noise] - different_from_baseline.loc[lower_than_base, 'none_none']

      # if fail rate is equal to baseline
      #equal_to_base = different_from_baseline[noise] == different_from_baseline['none_none']
      #df_standerlized.loc[equal_to_base, noise] = 0

      # if fail rate is higher than baseline
      #higher_than_base = different_from_baseline[noise] > different_from_baseline['none_none']
      #df_standerlized.loc[higher_than_base, noise] = different_from_baseline.loc[higher_than_base, noise] - different_from_baseline.loc[higher_than_base, 'none_none']

      # Just calculate the difference for every row, the calculation is the same for higher lower and same
      df_standerlized[noise] = different_from_baseline[noise] - different_from_baseline['none_none']



  df_standerlized = df_standerlized.round(2)

  # Calculate the total number of test cases
  total_test_cases = df_standerlized.shape[0]

  #display(df_standerlized)
  #print(f"Total test cases: {total_test_cases}")

  # Get the noise type columns (all columns except for "testcase_name" and "none_none")
  noise_types = [column for column in df_standerlized.columns if column not in ["testcase_name", "none_none"]]

  # Initialize a dictionary to store the results
  results = {noise_type: {} for noise_type in noise_types}

  # Calculate the counts and ratios for each noise type
  for noise_type in noise_types:
      for rate_type, condition in zip(["positive", "negative", "zero"], [df_standerlized[noise_type] > THRESHOLD, df_standerlized[noise_type] < -THRESHOLD, abs(df_standerlized[noise_type]) <= THRESHOLD]):
          count = df_standerlized[condition].shape[0]
          if total_test_cases != 0:
            ratio = (count / total_test_cases) * 100  # Convert to percentage
          else:
            ratio = float('nan')

          # Store the results
          results[noise_type][rate_type] = count
          results[noise_type]["ratio_" + rate_type] = ratio

  # Convert the results dictionary to a DataFrame for better visualization
  results_df = pd.DataFrame(results)
  results_df =results_df.round(2)
  transposed_df = results_df.transpose()

  # export result
  transposed_df.to_csv(os.getcwd()+".csv", index=True)

# CLICK ME!!! Iterate over each subdirectory

In [4]:
for subdirectory in subdirectories:
    # Change directory to the current subdirectory
    #if subdirectory != "bulma":
    #  continue
    os.chdir(os.path.join(gdrive_path, subdirectory))
    print(f"process project {subdirectory}")
    process()

process project alpine
['/content/gdrive/.shortcut-targets-by-id/16r1npLT-GjEiuoxB0LJqRJNvLv1UIep1/Robust DevOps: Chiemsee/sorted-results/alpine/cpu-high/cpu_high.csv', '/content/gdrive/.shortcut-targets-by-id/16r1npLT-GjEiuoxB0LJqRJNvLv1UIep1/Robust DevOps: Chiemsee/sorted-results/alpine/cpu-low/cpu_low.csv', '/content/gdrive/.shortcut-targets-by-id/16r1npLT-GjEiuoxB0LJqRJNvLv1UIep1/Robust DevOps: Chiemsee/sorted-results/alpine/io-low/io_low.csv', '/content/gdrive/.shortcut-targets-by-id/16r1npLT-GjEiuoxB0LJqRJNvLv1UIep1/Robust DevOps: Chiemsee/sorted-results/alpine/cpu-medium/cpu_medium.csv', '/content/gdrive/.shortcut-targets-by-id/16r1npLT-GjEiuoxB0LJqRJNvLv1UIep1/Robust DevOps: Chiemsee/sorted-results/alpine/io-high/io_high.csv', '/content/gdrive/.shortcut-targets-by-id/16r1npLT-GjEiuoxB0LJqRJNvLv1UIep1/Robust DevOps: Chiemsee/sorted-results/alpine/netdelay-high/netdelay_high.csv', '/content/gdrive/.shortcut-targets-by-id/16r1npLT-GjEiuoxB0LJqRJNvLv1UIep1/Robust DevOps: Chiemsee/s