In [1]:
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score

df = pd.read_csv('car.data', header=None)
# Rename columns according to attribute names
df.columns = ['buying', 'maint', 'doors', 'persons', 'lug_boot', 'safety', 'class']

# One-hot encode categorical features (excluding the target 'class')
df_encoded = pd.get_dummies(df, columns=['buying', 'maint', 'doors', 'persons', 'lug_boot', 'safety'])

# Display the first few rows of the encoded DataFrame
df_encoded.head()

# Prepare features and target
X = df_encoded.drop('class', axis=1)
y = df_encoded['class']

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Fit logistic regression model
model = LogisticRegression(max_iter=1000, multi_class='multinomial', solver='lbfgs')
model.fit(X_train, y_train)

# Predict and evaluate
y_pred = model.predict(X_test)
# F1 score for each category
f1_per_class = f1_score(y_test, y_pred, average=None, labels=model.classes_)
print("F1 score per class:", dict(zip(model.classes_, f1_per_class)))

# Overall (macro) F1 score
f1_macro = f1_score(y_test, y_pred, average='macro')
print("Overall (macro) F1 score:", f1_macro)


F1 score per class: {'acc': np.float64(0.782051282051282), 'good': np.float64(0.56), 'unacc': np.float64(0.9608247422680413), 'vgood': np.float64(0.8461538461538461)}
Overall (macro) F1 score: 0.7872574676182924


