In [None]:
import pandas as pd
from datetime import datetime
from sklearn.neighbors import NearestNeighbors
import warnings
warnings.filterwarnings('ignore')

In [None]:
def time_to_float(time_str):
      # parse time string into datetime object
      dt_obj = datetime.strptime(time_str, '%H:%M')
      # extract hour and minute components
      hour = dt_obj.hour
      minute = dt_obj.minute
      # convert to float between 0 to 24
      time_float = hour + minute / 60
      return time_float

In [None]:
def handle_categorical(df):
  # sport type
  df = df.join(pd.get_dummies(df["Sport Type"]))
  df = df.drop("Sport Type", axis=1)
  # weekday
  df = df.join(pd.get_dummies(df["Weekday"]))
  df = df.drop("Weekday", axis=1)
  # start time
  df["Start Time in float"] = df["Start Time"].apply(time_to_float)
  df = df.drop("Start Time", axis=1)
  return df

In [None]:
def generate_input(weekday, sport_type, start_time, duration, calories_burned):
  df2 = pd.DataFrame(columns=df.columns)
  new_row = {weekday: 1,
          sport_type: 1,
          "Start Time in float": time_to_float(start_time),
          "Duration (min)": duration,
          "Calories Burned": calories_burned}
  df2 = df2.append(new_row, ignore_index=True)
  df2 = df2.fillna(0)
  return list(df2.iloc[0][1:])

In [None]:
df = pd.read_csv("sport_data.csv")
df = handle_categorical(df)
df

Unnamed: 0,ID,Duration (min),Calories Burned,Basketball,Cycling,Running,Swimming,Tennis,Yoga,Friday,Monday,Sunday,Thursday,Tuesday,Wednesday,Start Time in float
0,1,90,800,0,0,0,1,0,0,0,1,0,0,0,0,17.0
1,1,120,1000,1,0,0,0,0,0,0,0,0,0,0,1,20.0
2,2,60,400,0,0,0,0,1,0,1,0,0,0,0,0,10.0
3,2,45,350,0,1,0,0,0,0,0,0,1,0,0,0,6.5
4,3,30,350,0,0,1,0,0,0,0,0,0,0,1,0,6.0
5,3,60,250,0,0,0,0,0,1,0,0,0,1,0,0,19.5
6,4,90,800,0,0,0,1,0,0,0,1,0,0,0,0,17.0
7,4,120,1000,1,0,0,0,0,0,0,0,0,0,0,1,20.0
8,5,60,400,0,0,0,0,1,0,1,0,0,0,0,0,10.0
9,5,45,350,0,1,0,0,0,0,0,0,1,0,0,0,6.5


In [None]:
# fit model
X = df[list(df.columns)[1:]]
neigh = NearestNeighbors(n_neighbors=2)
nbrs = neigh.fit(X)

In [None]:
query = [generate_input("Sunday", "Basketball", "16:00", 120, 1000)]
distances, indices = nbrs.kneighbors(query)
print(indices)
for index in indices:
  print(df.iloc[index])

[[7 1]]
   ID  Duration (min)  Calories Burned  Basketball  Cycling  Running  \
7   4             120             1000           1        0        0   
1   1             120             1000           1        0        0   

   Swimming  Tennis  Yoga  Friday  Monday  Sunday  Thursday  Tuesday  \
7         0       0     0       0       0       0         0        0   
1         0       0     0       0       0       0         0        0   

   Wednesday  Start Time in float  
7          1                 20.0  
1          1                 20.0  
