In [19]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

#  데이터 불러오기
file_path = "C:/Users/wjdqh/injige/iris.csv"
df = pd.read_csv(file_path)

# 특성과 타겟을 컬럼 이름으로 지정하여 분리
X = df.drop(columns=['Name'])  # 특성
y = df['Name']  # 타겟

# 문자열 타겟을 숫자로 변환 (라벨 인코딩)
encoder = LabelEncoder()
y = encoder.fit_transform(y)

# 데이터 분할 (훈련 80%, 테스트 20%) 
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# 데이터 정규화 (SVM과 Logistic Regression에서 사용)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 모델 초기화
models = {
    "Decision Tree": DecisionTreeClassifier(random_state=42),
    "Random Forest": RandomForestClassifier(random_state=42),
    "SVM": SVC(random_state=42),
    "Logistic Regression": LogisticRegression(random_state=42, max_iter=200)
}

# 모델 학습 및 평가
results = {}
for name, model in models.items():
    if name in ["SVM", "Logistic Regression"]:  # 정규화된 데이터 사용
        model.fit(X_train_scaled, y_train)
        y_pred = model.predict(X_test_scaled)
    else:  # 원본 데이터 사용
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)
    results[name] = accuracy

# 결과 출력
for model, accuracy in results.items():
    print(f"{model}: 정확도 {accuracy * 100:.2f}%")




Decision Tree: 정확도 93.33%
Random Forest: 정확도 90.00%
SVM: 정확도 96.67%
Logistic Regression: 정확도 93.33%
