In [16]:
import pandas as pd
import numpy as np
import glob, pathlib

def get_final_score(Y_pred, Y_real, verbose:bool=False):
  # ------------------------------------------ #
  if type(Y_pred) == pd.DataFrame:
    Y_pred = Y_pred.values
  if type(Y_real) == pd.DataFrame:
    Y_real = Y_real.values

  res_a = np.abs(Y_real-Y_pred)

  total = len(res_a)
  u_05 = np.sum(res_a <= 0.05)
  u_05_p = np.round(100 * (u_05 / total), 2)

  u_10 = np.sum(res_a <= 0.10) - u_05
  u_10_p = np.round(100 * (u_10 / total), 2)

  u_50 = np.sum(res_a <= 0.50) - u_05 - u_10
  u_50_p = np.round(100 * (u_50 / total), 2)

  high = np.sum(res_a > 0.50)
  high_p = np.round(100 * (high / total), 2)
  
  # ------------------------------------------ #
  bool_array = res_a > 0.1
  # Finden Sie die Indizes der Übergänge von False zu True
  transitions = np.where(np.diff(bool_array))[0] + 1
  # Teilen Sie das Array in zusammenhängende Blöcke
  blocks = np.split(bool_array, transitions)
  # Filtern Sie nur die Blöcke mit True-Werten
  true_blocks = [block for block in blocks if block[0]]
  
  single_blocks=0
  short_blocks=0
  long_blocks=0

  for block in true_blocks:
    if len(block) < 2:
      single_blocks+=1
    elif len(block) >= 2 and len(block) < 10:
      short_blocks+=1
    else:
      long_blocks+=1

  # general_block_score = np.round(100*(1.0 - (len(true_blocks) / (total/2))), 2)
  general_block_score = u_05_p
  single_block_score = np.round(100*((single_blocks / (total))), 2)
  short_block_score = np.round(100*((short_blocks / (total))), 2)
  # ------------------------------------------ #
  total_error_score = u_05_p + (u_10_p/2) + (u_50_p/4)
  total_block_score = general_block_score + (single_block_score/2) + (short_block_score/4)

  if verbose: 
    print(f'  0 - .05: {u_05_p}%')
    print(f'.05 - .1: {u_10_p}%')
    print(f'.10 - .5: {u_50_p}%')
    print(f'Higher .50: {high_p}% \n')
    print(f'Single outlierblocks: {single_blocks}')
    print(f'Short outlierblocks: {short_blocks}')
    print(f'Long outlierblocks: {long_blocks}')
    print(f'----- Scores -----')
    print(f'Error Scores:')
    print(f'0 - .05 deviation score: {np.round(u_05_p, 4)}')
    print(f'.05 - .1 deviation score: {np.round(u_10_p/2, 4)}')
    print(f'.1 - .5 deviation score: {np.round(u_50_p/4, 4)}')
    print(f'Total error score: {total_error_score}')
    print(f'Block Scores:')
    print(f'General score for less block: {general_block_score}')
    print(f'Single outliers score: {single_block_score/2}')
    print(f'Short outliers score: {short_block_score/4}')
    print(f'Total block score: {total_block_score}')
    print('----- ----- ----- ----- -----')
  
  return np.round(total_error_score + total_block_score, 0)

# def find_originals():

if __name__ =="__main__":
  eval_path='C://Users//jeickmeyer//Development//GIT//anomalieerkennung-am-temper//Hackathon2023//Submissions//DasKollektiv//'
  # eval_path='C://Users//jeickmeyer//Development//GIT//anomalieerkennung-am-temper//Hackathon2023//Data_Jury//'
  ref_path='C://Users//jeickmeyer//Development//GIT//anomalieerkennung-am-temper//Hackathon2023//Data_Jury//Y'

  references_list = []

  pred_score_list = []
  xg_score_list = []
  nn_score_list = []

  for file in glob.glob(eval_path+'**'):
    if '_pred' in file and '.pq' in file:
      print(file)
      _ = ref_path+'//'+str(pathlib.Path(file).name).replace('_pred', '')
      if 'data' in _:
        _ = _.replace('data', 'target')
      pred_y = pd.read_parquet(file)
      real_y = pd.read_parquet(_)
      # print(f'\nMatching\n\tPrediction:\t{file}\n\tOriginal:\t{_}')
      score = get_final_score(pred_y, real_y)
      pred_score_list.append(score)
    
    # if '_XG' in file:
    #   _ = ref_path+'//'+str(pathlib.Path(file).name).replace('_XG', '')
    #   if 'data' in _:
    #     _ = _.replace('data', 'target')
    #   pred_y = pd.read_parquet(file)
    #   real_y = pd.read_parquet(_)
    #   # print(f'\nMatching\n\tPrediction:\t{file}\n\tOriginal:\t{_}')
    #   score = get_final_score(pred_y, real_y)
    #   xg_score_list.append(score)
    
    # if '_NN' in file:
    #   _ = ref_path+'//'+str(pathlib.Path(file).name).replace('_NN', '')
    #   if 'data' in _:
    #     _ = _.replace('data', 'target')
    #   pred_y = pd.read_parquet(file)
    #   real_y = pd.read_parquet(_)
    #   # print(f'\nMatching\n\tPrediction:\t{file}\n\tOriginal:\t{_}')
    #   score = get_final_score(pred_y, real_y)
    #   nn_score_list.append(score)

print(f'\n{pred_score_list}\tTotal: {np.sum(pred_score_list)}/{len(pred_score_list)*200} ({round(100*(np.sum(pred_score_list)/(len(pred_score_list)*200)), 2)}%)')
# print(f'\n{xg_score_list}\tTotal: {np.sum(xg_score_list)}/{len(xg_score_list)*200} ({round(100*(np.sum(xg_score_list)/(len(xg_score_list)*200)), 2)}%)')
# print(f'\n{nn_score_list}\tTotal: {np.sum(nn_score_list)}/{len(nn_score_list)*200} ({round(100*(np.sum(nn_score_list)/(len(nn_score_list)*200)), 2)}%)')

C://Users//jeickmeyer//Development//GIT//anomalieerkennung-am-temper//Hackathon2023//Submissions//DasKollektiv\2023_03_17_data_pred.pq
C://Users//jeickmeyer//Development//GIT//anomalieerkennung-am-temper//Hackathon2023//Submissions//DasKollektiv\2023_03_24_data_pred.pq
C://Users//jeickmeyer//Development//GIT//anomalieerkennung-am-temper//Hackathon2023//Submissions//DasKollektiv\2023_03_31_data_pred.pq
C://Users//jeickmeyer//Development//GIT//anomalieerkennung-am-temper//Hackathon2023//Submissions//DasKollektiv\2023_04_14_data_pred.pq
C://Users//jeickmeyer//Development//GIT//anomalieerkennung-am-temper//Hackathon2023//Submissions//DasKollektiv\2023_04_18_data_pred.pq

[27.0, 63.0, 29.0, 1.0, 16.0]	Total: 136.0/1000 (13.6%)
