In [1]:
import pandas as pd

# Load csv
df = pd.read_csv('D:/inductive_deductive/dataset/inductive_deductive_dataset.csv')

# Drop unwanted unnamed columns
df = df.loc[:, ~df.columns.str.contains('^Unnamed')]

# Define question indices
inductive_indices = [0, 2, 4, 6, 8, 9, 12]    # Q1, Q3, Q5, Q7, Q9, Q10, Q13
deductive_indices = [1, 3, 5, 7, 10, 11, 13]  # Q2, Q4, Q6, Q8, Q11, Q12, Q14

# Map text to scores
score_map = {'Never': 1, 'Sometimes': 2, 'Usually': 3, 'Always': 4}
df = df.apply(lambda col: col.map(score_map).fillna(0)).astype(int)

# Extract question names
questions = df.columns.tolist()
inductive_qs = [questions[i] for i in inductive_indices]
deductive_qs = [questions[i] for i in deductive_indices]

# Compute scores
df['inductive_score'] = df[inductive_qs].sum(axis=1)
df['deductive_score'] = df[deductive_qs].sum(axis=1)

# Remove rows where inductive_score equals deductive_score (tie situation)
df = df[df['inductive_score'] != df['deductive_score']]

# Assign labels
def assign_label(row):
    if row['inductive_score'] > row['deductive_score']:
        return 'Inductive'
    else:
        return 'Deductive'
df['label'] = df.apply(assign_label, axis=1)

# Print value counts of the labels to confirm the removal of Hybrid
print(df['label'].value_counts())

# Save the processed and labeled data to a new CSV
df.to_csv('D:/inductive_deductive/backend/auto_labeled_responses.csv', index=False)
print("✅ Labeled data saved to 'auto_labeled_responses.csv'")


label
Inductive    91
Deductive    78
Name: count, dtype: int64
✅ Labeled data saved to 'auto_labeled_responses.csv'
