# Final project: The runners database

### Disclaimer: For this code to run, a file path has to be provided where the files "runners_data.csv" and "run_data.csv" can be saved for further use. These files will obtain the information created in this program. 

The following code creates a database for runners where they can create their own accounts, record runs and take further actions. All of the information is saved in two CSV files, one of which includes all the data of the individual runners while the other ones is filled with data about their runs. I will take you step by step through by project.

At first, we import the relevant libraries for the code to run.

In [12]:
import csv
from datetime import datetime, timedelta
import pandas as pd
import random
import os
from os import write

As already stated, the program is based on information that we save to two CSV files. With the following code snippet, the CSV files are created if they are not already existing.

In [13]:
# Example file path: runners_data = "C:/Users/Yoshio/OneDrive/runners_data.csv" if stored locally on computer
runners_data = "runners_data.csv"

if not os.path.exists(runners_data):
  with open(runners_data, mode = "w", newline = "", encoding = "utf-8") as file:
    writer = csv.writer(file)
    writer.writerow(["runner_id", "name", "age", "max_hr", "resting_hr"])

# Example file path: runners_data = "C:/Users/Yoshio/OneDrive/run_data.csv" if stored locally on computer
run_data = "run_data.csv"

if not os.path.exists(run_data):
  with open(run_data, mode = "w", newline = "", encoding = "utf-8") as file:
    writer = csv.writer(file)
    writer.writerow(["run_id", "runner_id", "type", "date", "distance", "duration", "average_bpm", "vo2_max", "calories_burned"])

Now the first class is created. The Runner class asks for necessary input from the user to gather all the relevant information from the runner and estimates certain metrics like the maximum heart rate or resting heart rate which are used in further calculation when it comes to the runs.

In [14]:
# At first, the class Runner is initialized
class Runner():
    def __init__(self, id, name = None, age = None, max_hr = None, resting_hr = None):
        self.__id = id
        self.__name = name
        self.__age = age
        self.__max_hr = max_hr
        self.__resting_hr = resting_hr

    # A user can either create their account or log into their account, so we need to ask for an id first
    # Since this method is not tied to the Runner class but is still relevant in this context, I created a static method
    @staticmethod
    def ask_for_user_id():
        while True:
            try:
                user_id = input("\nPlease enter your user id: ")
                if not user_id.isdigit():
                    raise ValueError("\nUser id must only contain digits.")
                if len(user_id) != 4:
                    raise ValueError("\nUser id must be 4 digits long.")
                return user_id
            except ValueError as e:
                print(f"\nInvalid input: {e}")

    # After the user provided his user id, we need to check whether the user already exists
    # This again is a static method
    @staticmethod
    def check_if_user_exists(user_id):
        df = pd.read_csv(runners_data)
        # Since I had some problems when it came to comparing data types, I chose to convert the user ids from the CSV to strings
        if user_id in df["runner_id"].astype(str).values:
            return True
        else:
            return False

    # A new user has to state his name 
    def create_name(self):
        while True:
            try:
                self.__name = str(input("\nPlease enter your first name: ")).capitalize()
                if not self.__name.isalpha():
                    raise ValueError("\nName must only contain letters.")
                return self.__name
            except ValueError as e:
                print(f"\nInvalid input: {e}")

    # And the new user is asked about his age 
    def create_age(self):
        while True:
            try:
                self.__age = int(input("\nPlease enter your age: "))
                if self.__age <= 0:
                    raise ValueError("\nAge must be a positive integer.")
                return self.__age
            except ValueError as e:
                print(f"\nInvalid input: {e}.")

    # For the calculation of the maximum heart rate of the user, we need to know his fitness level
    @staticmethod
    def find_fitness_level():
        while True:
            fitness_level = input("\nPlease provide your fitness level: Low, average, or high: ").strip().lower()
            if fitness_level in ["low", "average", "high"]:
                return fitness_level
            print("\nInvalid input: Fitness level must be either low, average, or high.")

    def calculate_max_hr(self, fitness_level):
        age = self.__age
        # Use previously defined input of age to estimate the baseline maximum heart rate with the Tanaka formula
        baseline_hr = 208 - 0.7 * age
        # Adjust maximum heart rate to fitness level of the user
        if fitness_level == "low":
            self.__max_hr = baseline_hr - 5
        elif fitness_level == "high":
            self.__max_hr = baseline_hr + 5
        else:
            self.__max_hr = baseline_hr
        return self.__max_hr

    def calculate_resting_hr(self, fitness_level):
        age = self.__age
        # Estimate the resting heart rate with the age-adjusted average RHR formula
        baseline_value = 60
        fitness_adjustment = {"low" : 20, "average" : 0, "high" : -20}
        age_adjustment = age * 0.2
        self.__resting_hr = baseline_value + fitness_adjustment[fitness_level] + age_adjustment
        return self.__resting_hr

    # After asking for the relevant user information and estimating their maximum and resting heart rate, the data is saved to the CSV file runners_data
    # The information is saved in square brackets so the data is later on compatible with the pandas library
    def save_information(self):
        runner = {
            "user_id" : [self.__id],
            "name" : [self.__name],
            "age" : [self.__age],
            "max_hr" : [self.__max_hr],
            "resting_hr" : [self.__resting_hr]
        }
        df = pd.DataFrame(runner)
        df.to_csv(runners_data, mode = "a", header = False, index = False)
        print(f"\nRunner {self.__name} with id {self.__id}, age of {self.__age}, maximum heart rate of {self.__max_hr} and resting heart rate of {self.__resting_hr} got saved.")

    # Lastly, the user name is retrieved from the CSV so we can print a welcome message to the user        
    @staticmethod
    def retrieve_information_from_csv(user_id):
        df = pd.read_csv(runners_data)
        # After having problems compared, the user id is converted to a string here as well
        user_row = df[df["runner_id"].astype(str) == str(user_id)]
        return user_row.iloc[0]["name"]

    # All of the functionalities of the Runner class are done, to fill the CSV already, I wanted to created some random runners
    def generate_random_runners(self, num_runners):
        #ChatGPT created 50 random names for me
        first_names = [
            "Ella", "William", "Nora", "David", "Aurora", "Mason", "Luna", "Zoe", "Owen", "Scarlett",
            "James", "Abigail", "Michael", "Avery", "Aiden", "Gabriel", "Carter", "Harper", "Sophia", "Lucas",
            "John", "Sebastian", "Aria", "Benjamin", "Emma", "Laura", "Chloe", "Lily", "Matthew", "Leo",
            "Mia", "Isaac", "Oliver", "Sarah", "Elijah", "Evelyn", "Jacob", "Violet", "Emily", "Charlotte",
            "Daniel", "Jackson", "Samuel", "Isabella", "Joseph", "Hazel", "Amelia", "Henry", "Anna", "Alexander"
        ]
        # To be able to calculate the maximum and resting heart rate, we need a fitness level again
        fitness_levels = ["low", "average", "high"]
        for i in range(num_runners):
            user_id = str(random.randint(1000, 9999))
            # To save the information properly, we create an instance of the class Runner based on the random user id
            runner = Runner(user_id)
            runner.__name = random.choice(first_names)
            runner.__age = random.randint(18, 65)
            random_fitness_level = random.choice(fitness_levels)
            max_hr = runner.calculate_max_hr(random_fitness_level)
            resting_hr = runner.calculate_resting_hr(random_fitness_level)
            runner.save_information()
    

The random runners get already created so the command-line interface later on looks cleaner and can be viewed in the CSV file "runners_data".

In [None]:
runner = Runner(None)
runner.generate_random_runners(25)

The next class, that gets initialized, is the Run class. The Run class includes all the functionalities that are tight to runs, like adding them or sorting them by distance.

In [16]:
class Run():
    def __init__(self, runner_id, run_type = None, date = None, distance = None, duration = None, average_bpm = None):
        self.runner_id = runner_id
        self.run_type = run_type
        self.date = date
        self.distance = distance
        self.duration = duration
        self.average_bpm = average_bpm
        self.run_id = None

    # When the user adds a run, we want to know different parameters as well
    # As of now, the user can create either a fast or a long run and I want to know on which type of run he went
    def get_run_type(self):
        while True:
            try:
                run_type = str(input("\nDid you go on a fast or long run? ")).lower()
                if run_type not in ["fast", "long"]:
                    raise ValueError("\nInvalid run type, please enter either fast or long.")
                return run_type
            except ValueError as e:
                print(f"Invalid input: {e}")

    # Next, the user states the date of his run
    def get_date(self):
        while True:
            try:
                date_input = input("\nPlease enter the date (DD-MM-YYYY) of your run or leave empty if it was today: ")
                # If the user leaves the input empty, the date is set to today
                if date_input == "":
                    self.date = datetime.today().strftime("%d-%m-%Y")
                    return self.date
                self.date = datetime.strptime(date_input, "%d-%m-%Y").strftime("%d-%m-%Y")
                return self.date
            except ValueError:
                print("\nInvalid format: Please enter a date in format DD-MM-YYYY or keep blank to set date to today.")

    # Ask for the distance in kilometers
    def get_distance(self):
        while True:
            try:
                self.distance = float(input("\nPlease enter the distance of your run in kilometers either with decimals or without: "))
                if self.distance <= 0:
                    raise ValueError("\nDistance must be a positive number.")
                return self.distance
            except ValueError as e:
                print(f"Invalid input: {e}")

    # Ask for duration in minutes   
    def get_duration(self):
        while True:
            try:
                self.duration = float(input("\nHow long have you been running in minutes? "))
                if self.duration <= 0:
                    raise ValueError("\nDuration must be more than 0 minutes.")
                return self.duration
            except ValueError as e:
                print(f"Invalid input: {e}")

    # The last input to obtain is the average heart rate during the run
    def get_bpm(self):
        while True:
            try:
                self.average_bpm = int(input("\nWhat was your average heart rate? "))
                if self.average_bpm <= 0:
                    raise ValueError ("\nAverage heart rate must be positive.")
                return self.average_bpm
            except ValueError as e:
                print(f"Invalid input: {e}")

    # For every run, a individual run id based is created
    # It consists of the user id, an "R" and the number of run the user went on
    # To create this run id, the last run id of the user has to be retrieved
    @staticmethod
    def retrieve_run_id():
        df = pd.read_csv(run_data)
        correct_runner = df[df["runner_id"].astype(str) == str(user_id)]
        if not correct_runner.empty:
            #Find the last run of the user id
            last_run_id = correct_runner.iloc[-1]["run_id"]
            # Split the run id to only obtain the number of the last run
            last_run_number = int(last_run_id.split("R")[1])
            return last_run_number
        else:
            return 0

    #Now the run id can be created
    def generate_run_id(self, user_id, last_run_number):
        self.run_id = f"{user_id}R{last_run_number + 1}"
        return self.run_id

# As stated before, the user can choose between two run types to go on
# The first one of these is the fast run that gets initialized here
class FastRun(Run):
    def __init__(self, runner_id, run_id, date = None, distance = None, duration = None, average_bpm = None, vo2_max = None):
        # FastRun inherits important information and methods from the class Run
        super().__init__(runner_id, "Fast run", date, distance, duration, average_bpm)
        self.run_id = run_id
        self.vo2_max = vo2_max

    # A useful metric when dealing with fast runs is the VO2 max
    # To be able to estimate this, the maximum heart rate of the user has to be retrieved from the CSV
    @staticmethod
    def retrieve_max_hr(user_id):
        df = pd.read_csv(runners_data)
        user_row = df[df["runner_id"].astype(str) == str(user_id)]
        return user_row.iloc[0]["max_hr"]

    # The resting heart rate is needed as well
    @staticmethod
    def retrieve_resting_hr(user_id):
        df = pd.read_csv(runners_data)
        user_row = df[df["runner_id"].astype(str) == str(user_id)]
        return user_row.iloc[0]["resting_hr"]

    # After retrieving the relevant information, the calculation of the VO2 max is fairly easy
    def estimation_vo2_max(self, max_hr, resting_hr, distance, duration):
        minutes_pace = distance / duration
        hourly_pace = minutes_pace * 60
        performance_vo2_max = hourly_pace * 3.5
        base_vo2_max = (max_hr / resting_hr) * 15.3
        self.vo2_max = (performance_vo2_max / base_vo2_max) / 2
        return self.vo2_max

    # The user provided all the relevant information and all the necessary calculation were made, so the fast run can be saved to the CSV
    def save_fast_run(self):
        print(f"\nSaving FastRun: run_id={self.run_id}, runner_id={self.runner_id}, date={self.date}, distance={self.distance}, duration={self.duration}, average_bpm={self.average_bpm}, vo2_max={self.vo2_max}")
        run = {
            "run_id": [self.run_id],
            "runner_id": [self.runner_id],
            "type": [self.run_type],
            "date": [self.date],
            "distance": [self.distance],
            "duration": [self.duration],
            "average_bpm": [self.average_bpm],
            "vo2_max": [self.vo2_max],
            "calories_burned" : [None]
        }
        df = pd.DataFrame(run)
        df.to_csv(run_data, mode = "a", header = False, index = False)

# The second type of run is the long run that gets initialized here
class LongRun(Run):
    def __init__(self, runner_id, run_id, distance=None, date=None, duration=None, average_bpm=None, calories_burned=None):
    #LongRun inherits important information and methods from the class Run
        super().__init__(runner_id, "Long run", date, distance, duration, average_bpm)
        self.run_id = run_id
        self.calories_burned = calories_burned

    # The chosen metric for a long run is calories burned which is calculated here
    def estimation_calories_burned(self):
        if self.distance is None:
            raise ValueError("\nDistance must be set before calculating calories burned.")
        typical_burn_rate = 70
        self.calories_burned = self.distance * typical_burn_rate
        return self.calories_burned

    # The user provided all the relevant information and all the necessary calculation were made, so the long run can be saved to the CSV
    def save_long_run(self):
        print(f"\nSaving LongRun: run_id={self.run_id}, runner_id={self.runner_id}, date={self.date}, distance={self.distance}, duration={self.duration}, average_bpm={self.average_bpm}, calories_burned={self.calories_burned}")
        run = {
            "run_id": [self.run_id],
            "runner_id": [self.runner_id],
            "type": [self.run_type],
            "date": [self.date],
            "distance": [self.distance],
            "duration": [self.duration],
            "average_bpm": [self.average_bpm],
            "vo2_max": [None],
            "calories_burned": [self.calories_burned],
        }
        df = pd.DataFrame(run)
        df.to_csv(run_data, mode = "a", header = False, index = False)



Next to the random runners I created, I also wanted to add some random runs. As you can see in the following, these random runs are depending on Run, FastRun and LongRun. Therefore, they cannot be initialized in FastRun or LongRun but have to be after both derived classes, so the random generation of runs is an independent function.

In [17]:
# All of the funcionalities of the Run class are done, to fill the CSV with some runs already, I wanted to create random runs
def generate_random_runs():
    df = pd.read_csv(runners_data)
    # Since each run is supposed to be tied to a certain runner and therefore user id, I iterated through the CSV file "runners_data"
    for index, row in df.iterrows():  
        user_id = row["runner_id"]
        # To be able to calculate the extra information later, it is useful to create an instance of the class Runner
        runner = Runner(user_id)
        # In this example, no user should have more than 10 runs
        number_of_run = random.randint(1, 10)
        # The second for loop now iterates through the number of runs per user to create all the necessary data
        for i in range(number_of_run):
            run = Run(user_id)
            # A unique run id is created for every run
            run_id = str(user_id) + "R" + str(i + 1)
            # It is random whether the user went on a fast or long run
            run_type = random.choice(["fast", "long"])
            # I chose to only generate a date within the last year
            date = (datetime.now() - timedelta(days=random.randint(0, 365))).strftime('%d-%m-%Y')
            # The distance can only be 42 kilometers at most and the duration depends on the distance
            distance = random.randint(1, 42)
            duration = random.randint(4, 6) * distance
            # If the random choice selects fast run, we need to initialize a fast run
            if run_type == "fast":
                # The duration and average heart rate gets chosen here because it is different for fast and long runs
                fast_run = FastRun(runner_id = user_id, run_id = run_id, date = date, distance = distance,
                                   duration = random.randint(4, 5) * distance, average_bpm = random.randint(160, 200))
                max_hr = fast_run.retrieve_max_hr(user_id)
                resting_hr = fast_run.retrieve_resting_hr(user_id)
                fast_run.vo2_max = fast_run.estimation_vo2_max(max_hr, resting_hr, distance, duration)
                fast_run.save_fast_run()
            #If this is a long run though, we need to use the class LongRun
            elif run_type == "long":
                long_run = LongRun(runner_id = user_id, run_id = run_id, date = date, distance = distance,
                                   duration = random.randint(5, 7) * distance, average_bpm=random.randint(140, 160))
                long_run.calories_burned = long_run.estimation_calories_burned()
                long_run.save_long_run()

The runs are created here to keep the command-line interface clean and can be viewed in the CSV file "run_data".

In [None]:
generate_random_runs()

After creating the needed classes for this program to make sense and generating runners and corresponding runs, a couple of functionalities are being coded:

- Binary search to find the date of a certain run.
- Merge sort to sort all the runs of a certain user after their distance, ascending.

The functionalities get implemented now to keep the code cleaner and not overload the classes too much. 

###### Additional note: Even though these methods might not make the most sense here, they got implemented to include more content from the syllabus.

In [19]:
# To be able to run the binary sort and find the corresponding date, we need to find the run id in the CSV "run_data" first
def get_wanted_run_id():
    df = pd.read_csv(run_data)
    try:
        run_id = input("\nEnter the run id to find the corresponding date: ")
        # To be able to compare, the run id gets converted to a string again
        if run_id in df["run_id"].astype(str).values:
            return run_id
        else:
            return -1
    except ValueError as e:
        print(f"\nInvalid input: {e}")

def binary_search(run_id):
    df = pd.read_csv(run_data)
    # Sort the DataFrame to be able to use the binary search
    df = df.sort_values(by="run_id").reset_index(drop=True) 
    first = 0
    last = len(df) - 1
    while first <= last:
        middle = (first + last) // 2
        mid_run_id = str(df.iloc[middle]["run_id"])       
        if run_id == mid_run_id:
            return df.iloc[middle]["date"]
        elif run_id < mid_run_id:
            last = middle - 1
        else:
            first = middle + 1
    return f"Date could not be found."

In [20]:
# The merge sort is implemented by reading the data set, defining the merge sort and the searching the data set that is filtered by the user id
def process_and_sort_runs(runner_id):
    df = pd.read_csv(run_data)
    # The data set gets filtered for the runner id of the user
    filtered_df = df[df["runner_id"].astype(str) == str(runner_id)]
    # If the data set is empty, return it empty
    if filtered_df.empty:
        return "No runs yet."
    # Implementation of the merge sort
    def merge_sort(df):
        if len(df) <= 1:
            return df
        mid = len(df) // 2
        left_half = merge_sort(df.iloc[:mid])
        right_half = merge_sort(df.iloc[mid:])
        sorted_df = []
        i = j = 0
        while i < len(left_half) and j < len(right_half):
            # To sort descending, ">" needs to be used
            if left_half.iloc[i]["distance"] > right_half.iloc[j]["distance"]:
                # Convert the rows to dictionaries so we can append them to the list "sorted_df"
                sorted_df.append(left_half.iloc[i].to_dict())
                i += 1
            else:
                sorted_df.append(right_half.iloc[j].to_dict())
                j += 1
        # Idea from ChatGPT to add the remaining elements
        sorted_df.extend(left_half.iloc[i:].to_dict(orient='records'))
        sorted_df.extend(right_half.iloc[j:].to_dict(orient='records'))
        return pd.DataFrame(sorted_df)
    # The filtered data set gets merge sorted
    sorted_filtered_df = merge_sort(filtered_df)
    return sorted_filtered_df.drop(columns=["runner_id"])


All the functionalities are coded. What now follows is the command-line interface where the user is prompted for input.

In [21]:
def runner_interface(user_id):
    while True:
        print("\nWhat would you like to do?")
        print("1. Add a new run.")
        print("2. Delete a run.")
        print("3. Find date for a certain run.")
        print("4. Sort runs by distance.")
        print("5. Save and exit.")
        try:
            choice = int(input("\nEnter your choice: "))
            if choice < 1 or choice > 5:
                raise ValueError("\nInvalid choice: Please enter a number between 1 and 5.")
        except ValueError as e:
            print("\nInvalid input: Please enter a number between 1 and 5.")
            continue
        if choice == 1:
            run = Run(user_id)
            run_type = run.get_run_type()
            run.get_date()
            distance = run.get_distance()
            duration = run.get_duration()
            run.get_bpm()
            last_run_number = run.retrieve_run_id()
            run_id = run.generate_run_id(user_id, last_run_number)
            if run_type == "fast":
                fast_run = FastRun(runner_id = user_id, run_id = run_id, date = run.date, distance = run.distance,
                                   duration = run.duration, average_bpm = run.average_bpm)
                max_hr = fast_run.retrieve_max_hr(user_id)
                resting_hr = fast_run.retrieve_resting_hr(user_id)
                fast_run.vo2_max = fast_run.estimation_vo2_max(max_hr, resting_hr, distance, duration)
                fast_run.save_fast_run()
            elif run_type == "long":
                long_run = LongRun(runner_id = user_id, run_id = run_id, date = run.date, distance = run.distance,
                                   duration = run.duration, average_bpm = run.average_bpm)
                long_run.calories_burned = long_run.estimation_calories_burned()
                long_run.save_long_run()
        elif choice == 2:
            print("\nSorry, this feature is not ready yet.")
        elif choice == 3:
            run_id = get_wanted_run_id()
            if run_id == -1:
                print("\nInvalid run id. Return to command-line interface.")
            else:
                date = binary_search(run_id)
                print(f"\nThe date of run {run_id} is {date}.")
        elif choice == 4:
            sorted_runs = process_and_sort_runs(user_id)
            print(f"\n{sorted_runs}")
        elif choice == 5:
            print("\nProgress saved, see you soon!")
            break
        else:
            print("\nInvalid choice. Please try again.")


Finally, the program can run.

In [None]:
#First, we ask the user for his id
user_id = Runner.ask_for_user_id()
#Then, we check whether the user exists
user_existence = Runner.check_if_user_exists(user_id)

#If he does not exist, we obtain all the necessary information
if not user_existence:
    runner = Runner(user_id)
    runner.create_name()
    runner.create_age()
    fitness_level = runner.find_fitness_level()
    runner.calculate_max_hr(fitness_level)
    runner.calculate_resting_hr(fitness_level)
    runner.save_information()
    runner_interface(user_id)
#If he exists. we log him in
else:
    runner = Runner(user_id)
    print(f"\nWelcome back, {runner.retrieve_information_from_csv(user_id)}")
    runner_interface(user_id)