In [None]:
import pandas as pd

uri = "https://gist.githubusercontent.com/guilhermesilveira/2d2efa37d66b6c84a722ea627a897ced/raw/10968b997d885cbded1c92938c7a9912ba41c615/tracking.csv"
data = pd.read_csv(uri)
data.head()

In [5]:
X = data[['home', 'how_it_works', 'contact']]
y = data['bought']

In [6]:
data.shape

(99, 4)

In [None]:
X_train = X[:75]
y_train = y[:75]
X_test = X[75:]
y_test = y[75:]

print(f'Training with {len(X_train)} and testing with {len(X_test)} elements')

In [12]:
from sklearn.svm import LinearSVC
from sklearn.metrics import accuracy_score

model = LinearSVC()
model.fit(X_train, y_train)
predictions = model.predict(X_test)

accuracy = accuracy_score(y_test, predictions) * 100
print(f'Accuracy was {accuracy:.2f}')

Accuracy was 95.83


# Using library to separate training and testing

In [25]:
from sklearn.model_selection import train_test_split
from sklearn.svm import LinearSVC
from sklearn.metrics import accuracy_score

SEED = 20

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = SEED, test_size=0.25)

model = LinearSVC()
model.fit(X_train, y_train)
predictions = model.predict(X_test)

accuracy = accuracy_score(y_test, predictions) * 100
print(f'Accuracy was {accuracy:.2f}')

Accuracy was 96.00


In [26]:
print(y_train.value_counts())
print(y_test.value_counts())

0    47
1    27
Name: bought, dtype: int64
0    19
1     6
Name: bought, dtype: int64


## Stratify according to the Y

In [29]:
from sklearn.model_selection import train_test_split
from sklearn.svm import LinearSVC
from sklearn.metrics import accuracy_score

SEED = 20

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = SEED, test_size=0.25,
                                                    stratify= y)

model = LinearSVC()
model.fit(X_train, y_train)
predictions = model.predict(X_test)

accuracy = accuracy_score(y_test, predictions) * 100
print(f'Accuracy was {accuracy:.2f}')

Accuracy was 96.00


In [30]:
print(y_train.value_counts())
print(y_test.value_counts())

0    49
1    25
Name: bought, dtype: int64
0    17
1     8
Name: bought, dtype: int64
