# Predicting Tennis Matches with Support Vector Machines

In [1]:
import pandas as pd
import joblib
from sklearn.model_selection import train_test_split
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import accuracy_score
from scipy.stats import uniform
from sklearn.svm import SVC
import config.ConnectionConfig as cc

In [2]:
scaler = joblib.load('../models/tennis_nn_scaler.pkl')

In [3]:
cc.setupEnvironment()
spark = cc.startLocalCluster("Tennis Predictions")
spark.getActiveSession()

In [4]:
data = spark.read.csv('../data/final_train_df_spark.csv', header=True, inferSchema=True)
data = data.toPandas()
data

In [5]:
data = data.drop(['Surface_Carpet', 'Surface_Clay', 'Surface_Grass', 'Surface_Hard', 'Total time1', 'Total time2', 'Ranking at that time1', 'Ranking at that time2', 'round value1', 'round value2'], axis=1)

In [6]:
X = data.iloc[:, :-1]
y = data.iloc[:, -1]
X.shape, y.shape

In [7]:
X_scaled = scaler.fit_transform(X)
X_scaled = pd.DataFrame(X_scaled, columns=X.columns)
X_scaled

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)
X_train.shape, X_test.shape, y_train.shape, y_test.shape

In [9]:
svm_clf = SVC(probability=True)
svm_clf.fit(X_train, y_train)

In [10]:
y_pred = svm_clf.predict(X_test)
accuracy_score(y_test, y_pred)

In [12]:
joblib.dump(svm_clf, '../models/tennis_svm_model.pkl')