# The notebook is to run the Kaggle Validation dataset through the prediction API.  This is used to test both the single and multiple call endpoints.

In [1]:
import pandas as pd
import requests
import datetime as dt
import json

In [2]:
#Load in Kaggle validation data
data_path = '/home/jovyan/work/data/test.csv'
kaggle_df = pd.read_csv(data_path, index_col=0)

## First test the single call by using an apply

In [None]:
turl = 'http://192.168.86.44:8002/singletitanicpred'

In [None]:
def single_test(df, turl):
    my_values = {"pclass": df['Pclass'],"sex": df['Sex'],"sibsp": df['SibSp'],"parch": df['Parch'],"fare": df['Fare'],"embarked": df['Embarked'],"age": df['Age'],"passid": df['PassengerId']}
    pred_responce = requests.post(turl, json = my_values)
    pred_responce_json = pred_responce.json()
    return(pred_responce_json['prod_prediciton_binary'])

In [None]:
# Currently FastAPI does not handel NaN values.  While there is server side validation, if I pass a NaN FastAPI will not accept it.  As a result I will convert NaN values before I send.
kaggle_df['Fare'] = kaggle_df['Fare'].fillna(-1)
kaggle_df['Embarked'] = kaggle_df['Embarked'].fillna('NA')
kaggle_df['Age'] = kaggle_df['Age'].fillna('-1')
kaggle_df.reset_index(level=0, inplace=True)

In [None]:
tic = dt.datetime.now()                   
kaggle_df['Prediction'] = kaggle_df.apply(single_test, args=(turl,), axis=1)
toc = dt.datetime.now()
print(f"Total Time took {toc - tic}")

In [None]:
kaggle_df.head(10)

### Total Time was 4 seconds to call each row.  This slow responce is due to the numerous API calls.  It would be faster to write one API call and send all the data at once

In [17]:
#Copy DataFrame and pre
tic = dt.datetime.now() 
tempcopy = kaggle_df.copy()
#Prepare data for sending to URL
tempcopy.reset_index(level=0, inplace=True)
apidf = tempcopy[['Pclass', 'Sex', 'SibSp', 'Parch', 'Fare', 'Embarked', 'Age', 'PassengerId']].copy()
apidf.rename({"Pclass": "pclass", "Sex": "sex", 'SibSp': 'sibsp', 'Parch':'parch', 'Fare': 'fare', 'Embarked':'embarked', 'Age':'age', 'PassengerId': 'passid'}, axis=1, inplace=True)
#Currently FastAPI does not handel NaN values.  While there is server side validation, if I pass a NaN FastAPI will not accept it.  As a result I will convert NaN values before I send.
apidf['fare'] = apidf['fare'].fillna(-1)
apidf['embarked'] = apidf['embarked'].fillna('NA')
apidf['age'] = apidf['age'].fillna('-1')
apidf_json = apidf.to_json(orient = 'records')
# Send data to URL
xurl = 'http://192.168.86.44:8002/multititanicpred' # API URL
xurl_responce = requests.post(xurl, data = apidf_json)
results = xurl_responce.json()
results = results['results']
#Turn Results into dataframe
responce_df = pd.DataFrame(results)
responce_df.rename({'passid': 'PassengerId'}, axis=1, inplace=True)
responce_df.set_index("PassengerId", inplace=True)
#Merge with orginal results
newdf = kaggle_df.join(responce_df)
toc = dt.datetime.now()
print(f"Total Time took {toc - tic}")
newdf.head()

Total Time took 0:00:00.238775


Unnamed: 0_level_0,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,prod_prediciton_binary,prod_prediction_word
PassengerId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
892,3,"Kelly, Mr. James",male,34.5,0,0,330911,7.8292,,Q,0,Did Not Survive
893,3,"Wilkes, Mrs. James (Ellen Needs)",female,47.0,1,0,363272,7.0,,S,0,Did Not Survive
894,2,"Myles, Mr. Thomas Francis",male,62.0,0,0,240276,9.6875,,Q,0,Did Not Survive
895,3,"Wirz, Mr. Albert",male,27.0,0,0,315154,8.6625,,S,0,Did Not Survive
896,3,"Hirvonen, Mrs. Alexander (Helga E Lindqvist)",female,22.0,1,1,3101298,12.2875,,S,0,Did Not Survive


### By passing a whole list instead of one at a time the time to results took less than 1/2 second.