In [14]:
import pandas as pd
from sklearn.model_selection import KFold
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.preprocessing import PolynomialFeatures
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.metrics import mean_squared_error

In [15]:
data = pd.read_csv('vgsales_1.csv')
data.dropna(inplace=True)
data

Unnamed: 0,Rank,Name,Platform,Year,Genre,Publisher,NA_Sales,EU_Sales,JP_Sales,Other_Sales,Global_Sales
0,13982,Trine 2,PC,2011.0,Platform,Focus Home Interactive,0.00,0.03,0.00,0.01,0.04
1,4269,Gremlins,2600,1983.0,Action,Atari,0.43,0.03,0.00,0.00,0.46
2,5519,Battle Dodge Ball,SNES,1991.0,Sports,Banpresto,0.00,0.00,0.33,0.00,0.33
3,4476,TimeSplitters 2,GC,2002.0,Shooter,Eidos Interactive,0.34,0.09,0.00,0.01,0.44
4,13052,Sherlock Holmes: Crimes & Punishments,PC,2014.0,Adventure,Focus Home Interactive,0.00,0.05,0.00,0.00,0.05
...,...,...,...,...,...,...,...,...,...,...,...
548,13109,ESPN International Winter Sports 2002,GC,2002.0,Sports,Konami Digital Entertainment,0.04,0.01,0.00,0.00,0.05
549,5882,Fossil Fighters (JP sales),DS,2008.0,Role-Playing,Nintendo,0.00,0.00,0.30,0.00,0.30
550,152,Resident Evil 2,PS,1998.0,Action,Virgin Interactive,1.88,1.47,2.02,0.45,5.82
551,1702,Final Fantasy I & II: Dawn of Souls,GBA,2004.0,Role-Playing,Nintendo,0.64,0.24,0.29,0.02,1.19


In [16]:
features = data.drop('Genre', axis=1) 
target = data['Genre']
label_encoder = LabelEncoder()
for column in features.columns:
    if features[column].dtype == 'object':
        features[column] = label_encoder.fit_transform(features[column])

In [17]:
kfold = KFold(n_splits=5, shuffle=True)

In [18]:
for train_index, test_index in kfold.split(features):
    X_train, X_test = features.iloc[train_index], features.iloc[test_index]
    y_train, y_test = target.iloc[train_index], target.iloc[test_index]

    print("\n")

    model_dt = DecisionTreeClassifier()
    model_dt.fit(X_train, y_train)
    y_pred_dt = model_dt.predict(X_test)
    accuracy_dt = accuracy_score(y_test, y_pred_dt)
    print(f"Decision Tree Accuracy: {accuracy_dt}")
    
    model_svm = SVC()
    model_svm.fit(X_train, y_train)
    y_pred_svm = model_svm.predict(X_test)
    accuracy_svm = accuracy_score(y_test, y_pred_svm)
    print(f"SVM Accuracy: {accuracy_svm}")

    model_knn = KNeighborsClassifier()
    model_knn.fit(X_train, y_train)
    y_pred_knn = model_knn.predict(X_test)
    accuracy_knn = accuracy_score(y_test, y_pred_knn)
    print(f"K-Nearest Neighbors Accuracy: {accuracy_knn}")

    model_rf = RandomForestClassifier()
    model_rf.fit(X_train, y_train)
    y_pred_rf = model_rf.predict(X_test)
    accuracy_rf = accuracy_score(y_test, y_pred_rf)
    print(f"Random Forest Accuracy: {accuracy_rf}")

    print("\n")
    print("Набор предсказаний дерева решений: ",y_pred_dt)
    print("Набор предсказаний опорных векторов: ",y_pred_svm)
    print("Набор предсказаний к-ближайших соседей: ",y_pred_knn)
    print("Набор предсказаний рандомного дерева: ",y_pred_rf)
    print("\n")




Decision Tree Accuracy: 0.21296296296296297
SVM Accuracy: 0.21296296296296297
K-Nearest Neighbors Accuracy: 0.1388888888888889
Random Forest Accuracy: 0.32407407407407407


Набор предсказаний дерева решений:  ['Misc' 'Misc' 'Shooter' 'Strategy' 'Adventure' 'Shooter' 'Role-Playing'
 'Platform' 'Shooter' 'Misc' 'Platform' 'Misc' 'Action' 'Misc'
 'Simulation' 'Platform' 'Shooter' 'Role-Playing' 'Role-Playing' 'Racing'
 'Action' 'Simulation' 'Shooter' 'Action' 'Simulation' 'Misc'
 'Role-Playing' 'Racing' 'Fighting' 'Sports' 'Sports' 'Racing' 'Shooter'
 'Platform' 'Role-Playing' 'Platform' 'Fighting' 'Shooter' 'Role-Playing'
 'Misc' 'Simulation' 'Sports' 'Role-Playing' 'Racing' 'Simulation'
 'Role-Playing' 'Action' 'Sports' 'Sports' 'Action' 'Adventure' 'Platform'
 'Action' 'Action' 'Action' 'Shooter' 'Misc' 'Role-Playing' 'Platform'
 'Sports' 'Role-Playing' 'Action' 'Action' 'Role-Playing' 'Strategy'
 'Sports' 'Role-Playing' 'Action' 'Strategy' 'Simulation' 'Action'
 'Racing' 'Action' 'M

In [19]:
#числовая целевая переменная
features_1 = data[['NA_Sales']] 
target_1 = data['NA_Sales']

In [20]:
label_encoder = LabelEncoder()
for column in features_1.columns:
    if features_1[column].dtype == 'object':
        features_1[column] = label_encoder.fit_transform(features_1[column])

In [21]:
for train_index, test_index in kfold.split(features_1):
    X_train, X_test = features_1.iloc[train_index], features_1.iloc[test_index]
    y_train, y_test = target_1.iloc[train_index], target_1.iloc[test_index]

    # Линейная регрессия
    model_linear = LinearRegression()
    model_linear.fit(X_train, y_train)
    y_pred_linear = model_linear.predict(X_test)
    mse_linear = mean_squared_error(y_test, y_pred_linear)
    print(f'Linear Regression MSE: {mse_linear}')

    # Полиномиальная регрессия
    polynomial_features = PolynomialFeatures(degree=2)
    X_train_poly = polynomial_features.fit_transform(X_train)
    X_test_poly = polynomial_features.transform(X_test)

    model_poly = LinearRegression()
    model_poly.fit(X_train_poly, y_train)
    y_pred_poly = model_poly.predict(X_test_poly)
    mse_poly = mean_squared_error(y_test, y_pred_poly)
    print(f'Polynomial Regression MSE: {mse_poly}')

    # Гребневая регрессия
    model_ridge = Ridge()
    model_ridge.fit(X_train, y_train)
    y_pred_ridge = model_ridge.predict(X_test)
    mse_ridge = mean_squared_error(y_test, y_pred_ridge)
    print(f'Ridge Regression MSE: {mse_ridge}')

    # Лассо-регрессия
    model_lasso = Lasso()
    model_lasso.fit(X_train, y_train)
    y_pred_lasso = model_lasso.predict(X_test)
    mse_lasso = mean_squared_error(y_test, y_pred_lasso)
    print(f'Lasso Regression MSE: {mse_lasso}')

    print("\n")
    print("Набор предсказаний линейной регрессии: ",y_pred_linear)
    print("Набор предсказаний полиномиальной регрессии: ",y_pred_poly)
    print("Набор предсказаний гребневой регрессии: ",y_pred_ridge)
    print("Набор предсказаний лассо-регрессии: ",y_pred_lasso)
    print("\n")


Linear Regression MSE: 6.87950094961657e-30
Polynomial Regression MSE: 9.293681942748628e-32
Ridge Regression MSE: 0.0013796825828613186
Lasso Regression MSE: 8.991285074266976


Набор предсказаний линейной регрессии:  [ 4.90000000e-01 -1.94289029e-16  1.90000000e-01  7.00000000e-02
  3.00000000e-02  6.50000000e-01  2.10000000e-01  8.00000000e-02
  5.10000000e-01  2.23000000e+00  2.90000000e-01  1.00000000e-02
 -1.94289029e-16  4.00000000e-02 -1.94289029e-16  1.00000000e-01
  9.00000000e-02  4.10000000e-01  3.90000000e-01  3.00000000e-02
  2.70000000e-01  2.00000000e-02  3.00000000e-02  2.00000000e-01
  1.80000000e-01  3.80000000e-01  1.00000000e-02  1.06000000e+00
  3.60000000e-01  3.70000000e-01  2.50000000e-01  1.10000000e-01
 -1.94289029e-16  5.20000000e-01  2.20000000e-01  9.00000000e-02
  1.80000000e-01  1.10000000e-01  3.00000000e-02 -1.94289029e-16
  1.00000000e-02  1.10000000e-01  2.00000000e-02 -1.94289029e-16
  7.00000000e-02  3.00000000e-02  1.20000000e-01 -1.94289029e-16
 