In [63]:
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn import metrics
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix, f1_score
from sklearn.tree import export_graphviz
from six import StringIO
from IPython.display import Image  
import pydotplus

scaler = StandardScaler()

In [64]:
bike = pd.read_csv('bike.csv')
bike.head()

Unnamed: 0.1,Unnamed: 0,season,yr,mnth,holiday,weekday,workingday,weathersit,temp,hum,windspeed,cnt
0,0,1,0,1,0,6,0,2,0.344167,0.805833,0.160446,985
1,1,1,0,1,0,0,0,2,0.363478,0.696087,0.248539,801
2,2,1,0,1,0,1,1,1,0.196364,0.437273,0.248309,1349
3,3,1,0,1,0,2,1,1,0.2,0.590435,0.160296,1562
4,4,1,0,1,0,3,1,1,0.226957,0.436957,0.1869,1600


In [65]:
X = bike.iloc[:, 1:11]
feature_cols = ['season', 'yr', 'mnth', 'holiday','weekday','workingday','weathersit', 'temp', 'hum', 'windspeed']
Y = bike.iloc[:, 11]
y_label = np.where(Y > 4500, 1, 0)
X_scaled = scaler.fit_transform(X)

In [66]:
pca = PCA()
X_pca = pca.fit_transform(X_scaled)

rental_bins = pd.qcut(Y, 
                      q = 3, 
                      labels = ['low', 'medium', 'high'])

In [67]:
X_train, X_test, Y_train, Y_test = train_test_split(X_pca, 
                                                    rental_bins, 
                                                    test_size = 0.2, 
                                                    random_state = 42)
X_trainDF = pd.DataFrame(X_train)
X_trainDF.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 584 entries, 0 to 583
Data columns (total 10 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   0       584 non-null    float64
 1   1       584 non-null    float64
 2   2       584 non-null    float64
 3   3       584 non-null    float64
 4   4       584 non-null    float64
 5   5       584 non-null    float64
 6   6       584 non-null    float64
 7   7       584 non-null    float64
 8   8       584 non-null    float64
 9   9       584 non-null    float64
dtypes: float64(10)
memory usage: 45.8 KB


In [68]:
clf = DecisionTreeClassifier()

clf = clf.fit(X_train, Y_train)

Y_pred = clf.predict(X_test)

In [69]:
print("Accuracy:", 
      metrics.accuracy_score(Y_test, Y_pred))

Accuracy: 0.7619047619047619


In [74]:
dot_data = StringIO()

export_graphviz(clf, 
                out_file = dot_data,  
                filled=True, 
                rounded=True,
                special_characters = True,
                feature_names = feature_cols,
                class_names = ['0','1', '2'])

In [None]:
graph = pydotplus.graph_from_dot_data(dot_data.getvalue())  
graph.write_png('bikes.png')
Image(graph.create_png())