# Running Analysis
## Garmin data

### importing libraries

In [80]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime

### loading data

In [81]:
def load_data(file: str) -> pd.DataFrame:
    """
    param file: the name of the file with the running data
    :return: dataframe with the data
    """
    return pd.read_csv(file)
file = r"running_activities.csv"
data = load_data(file)
data.head()

Unnamed: 0,Activity Type,Date,Distance,Calories,Time,Avg HR,Max HR,Avg Run Cadence,Max Run Cadence,Avg Pace,...,Total Descent,Avg Stride Length,Moving Time,Elapsed Time,Min Elevation,Max Elevation,Effort,Self Feel,Route Index,Place
0,Running,12/09/2022 21:00,7.01,464,0:41:59,0,0,169,192,6:00,...,55,0.99,0:41:54,0:41:59,50,80,9,3,1,Ein Vered
1,Running,08/09/2022 20:02,5.0,329,0:28:19,0,0,168,176,5:39,...,34,1.05,0:28:18,0:28:24,55,80,8,5,2,Ein Vered
2,Running,05/09/2022 20:50,7.01,460,0:40:02,0,0,169,175,5:43,...,57,1.04,0:39:56,0:40:02,50,80,8,5,1,Ein Vered
3,Running,03/09/2022 20:10,7.0,464,0:41:36,0,0,170,182,5:56,...,53,0.99,0:41:34,0:41:36,52,78,7,5,1,Ein Vered
4,Running,01/09/2022 20:42,5.01,331,0:28:57,0,0,167,175,5:47,...,35,1.03,0:28:50,0:28:57,50,79,8,5,3,Ein Vered


### Data pre-proccessing

#### filtering all the five km runs

In [82]:
epsilon_of_five = 0.1
five_km_runs = data.loc[(data["Distance"] >= 5-epsilon_of_five) & (data["Distance"] <= 5+epsilon_of_five)]
five_km_runs = five_km_runs.reset_index(drop=True)
five_km_runs.describe()

Unnamed: 0,Distance,Calories,Avg HR,Max HR,Avg Run Cadence,Max Run Cadence,Avg Stride Length,Route Index
count,88.0,88.0,88.0,88.0,88.0,88.0,88.0,88.0
mean,5.005568,343.090909,122.806818,134.375,166.170455,177.886364,1.047727,8.920455
std,0.011127,16.713706,75.767014,82.805868,2.496994,10.033773,0.065526,4.381837
min,5.0,316.0,0.0,0.0,157.0,169.0,0.7,2.0
25%,5.0,329.0,0.0,0.0,165.0,173.0,1.04,4.75
50%,5.0,341.5,167.0,183.0,167.0,176.0,1.06,10.0
75%,5.01,353.5,171.25,186.0,168.0,178.0,1.08,11.0
max,5.09,396.0,179.0,195.0,170.0,227.0,1.15,18.0


#### changing the data type of column "Date"

In [83]:
five_km_runs["Date"] = pd.to_datetime(five_km_runs["Date"])

#### dividing the runs per season

In [84]:
summer_time = [5, 6, 7, 8, 9, 10]
winter_time = [1, 2, 3, 4, 11, 12]
summer_runs = pd.DataFrame()
winter_runs = pd.DataFrame()
for date in five_km_runs["Date"]:
    if date.month in summer_time:
        summer_runs = pd.concat([summer_runs, five_km_runs[five_km_runs["Date"] == date]], ignore_index=True, axis = 0)
    else:
        winter_runs = pd.concat([winter_runs, five_km_runs[five_km_runs["Date"] == date]], ignore_index=True, axis = 0)

In [90]:
# Summer runs preview
summer_runs = summer_runs.reset_index(drop=True).sort_values(by="Date")
summer_runs.describe()

Unnamed: 0,Distance,Calories,Avg HR,Max HR,Avg Run Cadence,Max Run Cadence,Avg Stride Length,Route Index
count,55.0,55.0,55.0,55.0,55.0,55.0,55.0,55.0
mean,5.006727,343.581818,107.436364,116.781818,165.854545,178.927273,1.039091,8.709091
std,0.013341,18.708971,82.095871,89.120537,2.676497,12.212402,0.07799,4.48319
min,5.0,321.0,0.0,0.0,157.0,169.0,0.7,2.0
25%,5.0,329.0,0.0,0.0,164.0,173.0,1.03,4.0
50%,5.0,336.0,165.0,182.0,167.0,176.0,1.06,10.0
75%,5.01,355.0,171.5,184.5,168.0,179.0,1.08,11.0
max,5.09,396.0,179.0,189.0,170.0,227.0,1.15,18.0


In [88]:
# Summer runs preview
summer_runs = summer_runs.reset_index(drop=True).sort_values(by="Date")
summer_runs.describe()

Unnamed: 0,Activity Type,Date,Distance,Calories,Time,Avg HR,Max HR,Avg Run Cadence,Max Run Cadence,Avg Pace,...,Total Descent,Avg Stride Length,Moving Time,Elapsed Time,Min Elevation,Max Elevation,Effort,Self Feel,Route Index,Place
0,Running,2021-05-10 19:27:00,5.0,360,0:28:28,174,186,162,169,5:41,...,39,1.08,0:28:28,0:28:28,270,301,--,--,11,Beer Sheva
1,Running,2021-07-10 21:25:00,5.0,349,0:29:31,164,180,166,173,5:54,...,27,1.02,0:29:29,0:29:31,56,75,--,--,3,Ein Vered
2,Running,2021-07-12 21:12:00,5.0,323,0:27:54,163,183,170,177,5:35,...,182,1.06,0:27:52,0:28:10,204,294,5,4,15,Traidmill
3,Running,2021-08-19 19:11:00,5.0,360,0:28:19,166,185,164,179,5:40,...,33,1.08,0:28:04,0:28:19,55,75,--,--,18,Ein Vered
4,Running,2021-08-22 20:17:00,5.09,369,0:30:03,164,182,164,173,5:54,...,23,1.03,0:29:38,0:30:03,55,75,--,--,14,Ein Vered
