In [1]:
import pandas as pd

In [90]:
def get_qas(df,leave_id_out=True):
  """
  Cleans dataframes for later usage

    Args:
        df (DataFrame): The given DataFrame.
        leave_id_out (bool): Boolean switch for the id column.

    Returns:
        DataFrame: the cleaned DataFrame.
  """
  answers = df[df['ParentId'].notna()]
  questions = df[df['ParentId'].isna()]

  # drop unnecessary columns
  q = questions.drop(columns=['ParentId','PostTypeId','OwnerUserId','CommentCount','ContentLicense','CreationDate','LastActivityDate','ViewCount'])
  a = answers.drop(columns=['PostTypeId','OwnerUserId','CommentCount','ContentLicense','Title','Tags','AnswerCount','ViewCount','AcceptedAnswerId','CreationDate','LastActivityDate'])

  # merge
  merged = pd.merge(q, a, left_on='Id', right_on='ParentId', suffixes=('_question', '_answer'))
  merged = merged[['Id_question', 'Body_question', 'Body_answer', 'Score_answer']]

  merged.columns = [ 'question_id','question', 'answer', 'score']

  if leave_id_out:
    merged = merged.drop(columns=['question_id'])

  return merged

def normalize_scores(df,leave_max_out=True):
  """
  Normalizes scores based on the highest scored answer per question.

    Args:
        df (DataFrame): The given DataFrame.
        leave_max_out (bool): Boolean switch for the max_score column.

    Returns:
        DataFrame: the normalized DataFrame.
  """
  # max_score by question
  df['max_score'] = df.groupby('question')['score'].transform('max')

  # normalize
  df['normalized_score'] = df['score'] / df['max_score']

  df = df.drop(columns=['score'])

  if leave_max_out:
    df = df.drop(columns=['max_score'])

  df.rename(columns={'normalized_score':'score'},inplace=True)

  return df

In [95]:
# using latin SE here
df = pd.read_csv('https://github.com/yarathealmighty/dumps/blob/main/data/parsed/latin.csv?raw=True')

latin = get_qas(df)
latin = normalize_scores(latin)
latin

Unnamed: 0,question,answer,score
0,I was taught that one can use the '-que' suffi...,"The way I was taught was that, as a general ru...",1.000000
1,I was taught that one can use the '-que' suffi...,"In Ecclesiastical Latin ""-que"" would be used i...",0.340909
2,I was taught that one can use the '-que' suffi...,"Both et and -que can often translate ""and"". Th...",0.250000
3,I was taught that one can use the '-que' suffi...,James Kingsbery's answer is exactly correct. I...,0.159091
4,I was taught that one can use the '-que' suffi...,The que suffix has a usage example with moment...,0.045455
...,...,...,...
9328,"So, I'm a PhD student working on the history o...",I would advocate for a freer translation: I th...,0.250000
9329,I came across this on google translate and I l...,"No. Anima is the Latin word for soul, apart fr...",1.000000
9330,"In a previous question of mine, What diphthong...","It isn’t traditionally used in normal writing,...",1.000000
9331,"In a previous question of mine, What diphthong...","Most of them don't need to be marked, since th...",0.500000
