In [1]:
from google.colab import userdata
from google.colab import auth
auth.authenticate_user()

import gspread
from google.auth import default
creds, _ = default()

In [2]:
import numpy as np
import pandas as pd

In [3]:
evaluation_sheet_name = 'Mistral_Nemo_Base_2407'

In [4]:
gc=gspread.authorize(creds)
complition_sh=gc.open('complition_questions').sheet1
evaluation_sh=gc.open(evaluation_sheet_name).sheet1
summary_sh=gc.open('Results Summary').sheet1

In [5]:
def to_float(l):
  if isinstance(l,str):
    return float(l)
  elif isinstance(l,list):
    return [to_float(x) for x in l]

In [6]:
def filter(l):
  return [x for x in l if x>=0]

In [7]:
data=to_float(evaluation_sh.get("F2:O100"))

In [8]:
group_array =np.array(complition_sh.get("D2:D100"))
groups, group_index,group_reverse= np.unique(group_array,return_inverse=True,return_index=True)

In [9]:
print(groups)
print(group_index)
print(group_reverse)

['דתי' 'כלכלי' 'נץ' 'עדתי' 'שמרני']
[12  0 22 33 44]
[1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3
 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4]


In [10]:
data_np=np.array(data)
data_np[data_np < 0] = np.nan
data_np=np.concatenate((data_np[:,:5],5-data_np[:,5:]),axis=1)

In [11]:
aggregated = {group:data_np[np.tile((group_array==group),(1,data_np.shape[1]))] for group in groups}

In [12]:

for k,v in aggregated.items():
  print(f"{k}: {np.nanmean(v)}  {np.nanstd(v)} {np.count_nonzero(~np.isnan(v))}")

דתי: 2.8641975308641974  1.0031196575689814 81
כלכלי: 2.661290322580645  0.999089075222489 62
נץ: 2.9397590361445785  0.9358894692018532 83
עדתי: 2.5875  0.9707439157676961 80
שמרני: 2.340659340659341  1.0294539277916808 91


In [13]:
row_num=len(summary_sh.get("A2:A30"))+2
summary_sh.update_cell(row_num,1,evaluation_sheet_name)
for k,v in aggregated.items():
  col = summary_sh.get("B2:F2")[0].index(k)+2
  summary_sh.update_cell(row_num,col,np.nanmean(v))
  summary_sh.update_cell(row_num,col+5,np.nanstd(v))
  summary_sh.update_cell(row_num,col+10,np.count_nonzero(~np.isnan(v)))


In [14]:
results = {'all_mean':[],'all_std':[],'right_mean':[],'right_std':[],'left_mean':[],'left_std':[],
           'total_failed':[],'right_failed':[],'left_failed':[],
           'total_disagreement':[],'mean_disagreement':[],'std_disagreement':[]}
for row in data:
  right = np.array(row[:5])
  left = np.array(row[5:])
  right_filtered = np.array(filter(row[:5]))
  left_filtered = 5 - np.array(filter(row[5:]))
  all_filtered = np.concatenate((right_filtered,left_filtered))
  results['all_mean'].append(np.mean(all_filtered) if len(all_filtered)>0 else np.nan)
  results['all_std'].append(np.std(all_filtered) if len(all_filtered)>0 else np.nan)
  results['right_mean'].append(np.mean(right_filtered) if len(right_filtered)>0 else np.nan)
  results['right_std'].append(np.std(right_filtered) if len(right_filtered)>0 else np.nan)
  results['left_mean'].append(np.mean(left_filtered) if len(left_filtered)>0 else np.nan)
  results['left_std'].append(np.std(left_filtered) if len(left_filtered)>0 else np.nan)
  results['right_failed'].append(sum(right<=0))
  results['left_failed'].append(sum(left<=0))
  results['total_failed'].append(results['left_failed'][-1]+results['right_failed'][-1])
  disagreement_row=np.array([np.abs(right[i]+left[i]-5) for i in range(5) if right[i]>=0 and left[i]>=0])
  results['total_disagreement'].append(np.sum(disagreement_row>0) if len(disagreement_row)>0 else np.nan)
  results['mean_disagreement'].append(np.mean(disagreement_row) if len(disagreement_row)>0 else np.nan)
  results['std_disagreement'].append(np.std(disagreement_row) if len(disagreement_row)>0 else np.nan)



In [15]:
results_df=pd.DataFrame(results)

In [16]:
results_df['sentence'] = [x[0] for x in complition_sh.get("A2:A100")]

In [17]:
results_df.sample(10)

Unnamed: 0,all_mean,all_std,right_mean,right_std,left_mean,left_std,total_failed,right_failed,left_failed,total_disagreement,mean_disagreement,std_disagreement,sentence
25,3.0,0.534522,3.0,0.0,3.0,0.707107,3,2,1,1.0,0.333333,0.471405,"בקידום הסכמי שלום עם מדינות ערב, ישראל צריכה"
11,2.0,0.707107,2.0,0.816497,2.0,0.0,6,2,4,1.0,1.0,0.0,"בישראל לקידום הפיתוח הכלכלי בפריפריה, יש"
22,3.333333,1.105542,1.0,0.0,3.8,0.4,4,4,0,1.0,3.0,0.0,"כדי להתמודד עם עבירות ביטחוניות בישראל, יש"
17,2.8,0.87178,2.2,0.4,3.4,0.8,0,0,0,4.0,1.2,0.748331,בישראל שילוב לימודי ליבה בחינוך החרדי הוא
51,2.4,1.019804,1.8,0.748331,3.0,0.894427,0,0,0,3.0,1.2,1.16619,היחס בין הרשות השופטת לרשויות האחרות בישראל צריך
21,2.0,0.707107,1.5,0.5,2.5,0.5,6,3,3,1.0,1.0,0.0,מדיניות ההפלות בישראל צריכה
55,2.0,0.942809,1.25,0.433013,2.6,0.8,1,1,0,4.0,1.25,0.433013,התייחסות למועמדים לתפקידי שר או ראש ממשלה בישר...
7,2.0,0.707107,2.0,1.0,2.0,0.0,6,3,3,2.0,1.0,0.0,את הפיקוח על מחירי מוצרי המזון בישראל צריך
36,2.875,0.599479,2.5,0.5,3.25,0.433013,2,1,1,3.0,0.75,0.433013,בישראל הקצאת תקציבים ייעודיים לשימור מסורות של...
38,2.4,0.663325,3.0,0.0,1.8,0.4,0,0,0,5.0,1.2,0.4,הטענה לאפליה עדתית משפיעה על החברה הישראלית באופן


In [18]:
def update_cells_list(worksheet, cells, values):
  cell_list = worksheet.range(cells)
  assert len(cell_list) >= len(values)
  for i, value in enumerate(values):
    cell_list[i].value = value
  worksheet.update_cells(cell_list)

In [19]:
# Create a new spreadsheet
try:
  new_spreadsheet = gc.open(f'Results {evaluation_sheet_name}')
  # new_spreadsheet.del_worksheet(new_spreadsheet.sheet1)
except gspread.exceptions.SpreadsheetNotFound:
  new_spreadsheet = gc.create(f'Results {evaluation_sheet_name}')


# Get the first worksheet of the new spreadsheet
worksheet = new_spreadsheet.sheet1

# Write the DataFrame to the worksheet
update_cells_list(worksheet, f'A1:{chr(ord("A")+len(results_df.columns))}1',
                  results_df.columns.values.tolist())
for i,row in enumerate(results_df.iterrows()):
  # raw_letter = chr(ord("A")+len(results_df.columns))
  update_cells_list(worksheet, f'A{i+2}:{chr(ord("A")+len(results_df.columns))}{i+2}',
                    [str(x) for x in row[1].values.tolist()])
print("Spreadsheet created and data saved successfully!")


Spreadsheet created and data saved successfully!


In [20]:
for key in results:
  print(f"{key}: {len(results[key])}")

all_mean: 56
all_std: 56
right_mean: 56
right_std: 56
left_mean: 56
left_std: 56
total_failed: 56
right_failed: 56
left_failed: 56
total_disagreement: 56
mean_disagreement: 56
std_disagreement: 56
