In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_selection import SelectFromModel
import matplotlib.pyplot as plt

df_wine = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data',header=None)
df_wine.columns = ['Class label','Alcohol','Malic acid','Ash','Alcalinity of ash','Magnesium',
                   'Total phenols','Flavanoids','Nonflavanoid phenols','Proanthocyanins',
                   'Color intensity','Hue','OD280/OD315 of diluted wines','Proline',]
print('Class labels', np.unique(df_wine['Class label']))
df_wine.head()

In [None]:
X, y = df_wine.iloc[:,1:].values, df_wine.iloc[:,0].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0,stratify=y)

In [None]:
stdsc = StandardScaler()
X_train_std = stdsc.fit_transform(X_train)
X_test_std = stdsc.transform(X_test)

In [None]:
lr = LogisticRegression(penalty='l1',C=1.0)
lr.fit(X_train_std,y_train)
print('Training accuracy: ', lr.score(X_train_std,y_train))
print('Test accuracy: ', lr.score(X_test_std,y_test))

In [None]:
print(lr.intercept_)
print(lr.coef_)

In [None]:
feat_labels = df_wine[1:]
forest = RandomForestClassifier(n_estimators=500,random_state=1)
forest.fit(X_train,y_train)

importances = forest.feature_importances_
indicies = np.argsort(importances)[::-1]
    
plt.title('Feature Importance')
plt.bar(range(X_train.shape[1]),importances[indicies],align='center')
plt.xticks(range(X_train.shape[1]),feat_labels,rotation=90)
plt.xlim([-1,X_train.shape[1]])
plt.tight_layout()
plt.show()

In [None]:
sfm = SelectFromModel(forest,threshold=0.1,prefit=True)
X_selected = sfm.transform(X_train)
print('Number of samples that met the criteria: ', X_selected.shape[0])
