In [9]:
import pandas as pd
import numpy as np

In [10]:
# Read the CSV files
df_ratings_test = pd.read_csv('data/preprocessed/ratings-test.csv')
df_ratings_train = pd.read_csv('data/preprocessed/ratings-train.csv')
df_personality = pd.read_csv('data/preprocessed/personality.csv')

In [11]:
# Define the MBTI types and their corresponding one-hot encoding indices
mbti_types = ['INTJ', 'INTP', 'ENTJ', 'ENTP', 'INFJ', 'INFP', 'ENFJ', 'ENFP', 'ISTJ', 'ISFJ', 'ESTJ', 'ESFJ', 'ISTP', 'ISFP', 'ESTP', 'ESFP']
num_mbti_types = len(mbti_types)

In [12]:
# Convert personality scores to MBTI scores
def convert_to_mbti(row):
    mbti_scores = {
        'openness': row['openness'],
        'agreeableness': row['agreeableness'],
        'neuroticism': row['neuroticism'],
        'conscientiousness': row['conscientiousness'],
        'extraversion': row['extraversion']
    }
    
    mbti_score = convert_ocean_to_mbti(mbti_scores)
    row['mbti_score'] = mbti_score
    
    # One-hot encode the MBTI score
    mbti_vector = np.zeros(num_mbti_types)
    mbti_index = mbti_types.index(mbti_score)
    mbti_vector[mbti_index] = 1
    
    return pd.Series(mbti_vector, index=mbti_types)

In [13]:
# Function to convert OCEAN scores to MBTI scores
def convert_ocean_to_mbti(ocean_scores):
    mbti_scores = ''
    
    # Extraversion/Introversion
    extraversion = ocean_scores['extraversion']
    if extraversion >= 3.5:
        mbti_scores += 'E'
    else:
        mbti_scores += 'I'
    
    # Sensing/Intuition
    openness = ocean_scores['openness']
    if openness >= 3.5:
        mbti_scores += 'N'
    else:
        mbti_scores += 'S'
    
    # Thinking/Feeling
    agreeableness = ocean_scores['agreeableness']
    if agreeableness >= 3.5:
        mbti_scores += 'F'
    else:
        mbti_scores += 'T'
    
    # Judging/Perceiving
    conscientiousness = ocean_scores['conscientiousness']
    if conscientiousness >= 3.5:
        mbti_scores += 'J'
    else:
        mbti_scores += 'P'
    
    return mbti_scores

In [14]:
# Apply conversion function to each row in the DataFrame
df_ratings_test[mbti_types] = df_ratings_test.apply(convert_to_mbti, axis=1)
df_ratings_train[mbti_types] = df_ratings_train.apply(convert_to_mbti, axis=1)
df_personality[mbti_types] = df_personality.apply(convert_to_mbti, axis=1)

In [15]:
# Save the CSV files in data/mbti
df_ratings_test.to_csv('data/mbti/ratings-test.csv', index=False)
df_ratings_train.to_csv('data/mbti/ratings-train.csv', index=False)
df_personality.to_csv('data/mbti/personality.csv', index=False)