In [1]:
import csv
import random

def generate_fitness_data(num_rows):
    """Generates synthetic fitness data."""

    data = []
    for _ in range(num_rows):
        age = random.randint(20, 45)
        gender = random.choice(["Male", "Female"])
        height = random.randint(155, 190)
        weight = random.randint(50, 100)
        resting_hr = random.randint(60, 85)
        workout_hr = random.randint(130, 170)

        fitness_goals = random.choice([
            "Weight Loss", "Toning", "Muscle Gain", "General Fitness",
            "Endurance Training", "Flexibility", "Balance",  # Added goals
            "Stress Relief", "Improved Sleep", "Rehabilitation" #Added goals
        ])

        preferred_exercises = random.choice([
            "Cardio", "Strength Training", "Mixed",
            "Yoga", "Pilates", "HIIT",  # Added exercises
            "Swimming", "Running", "Cycling", "Walking", #Added exercises
            "Weightlifting", "Bodybuilding", "Crossfit", #Added exercises
            "Dance", "Martial Arts", "Rock Climbing" #Added exercises
        ])

        suggested_sports = get_suggested_sports(fitness_goals, preferred_exercises)

        data.append([age, gender, height, weight, resting_hr, workout_hr, fitness_goals, preferred_exercises, suggested_sports])
    return data

def get_suggested_sports(fitness_goal, preferred_exercise):
    """Provides sport suggestions based on fitness goal and exercise preference."""

    sports_by_goal = {
        "Weight Loss": ["Running", "Cycling", "Swimming", "HIIT", "Cross-Training", "Zumba", "Spinning", "Rowing", "Elliptical", "Stair Climbing", "Cross-Training", "Dance Cardio", "Indoor Cycling", "Walking"],
        "Toning": ["Yoga", "Pilates", "Barre", "TRX", "Kettlebells", "Functional Training", "Hot Yoga", "Restorative Yoga", "Pilates Reformer"],
        "Muscle Gain": ["Weightlifting", "Bodybuilding", "Powerlifting", "Crossfit", "Calisthenics", "Olympic Lifting", "Plyometrics", "Strongman", "Kettlebell Training"],
        "General Fitness": ["Swimming", "Walking", "Dance", "Aerobics", "Elliptical", "Pilates", "Zumba", "Yoga", "Tai Chi"],
        "Endurance Training": ["Running", "Cycling", "Swimming", "Long-distance walking", "Triathlon"],
        "Flexibility": ["Yoga", "Pilates", "Stretching", "Tai Chi", "Gymnastics"],
        "Balance": ["Yoga", "Pilates", "Tai Chi", "Rock Climbing", "Gymnastics", "Ballet"],
        "Stress Relief": ["Yoga", "Pilates", "Tai Chi", "Meditation", "Walking", "Nature walks"],
        "Improved Sleep": ["Yoga", "Pilates", "Light cardio", "Swimming"],
        "Rehabilitation": ["Swimming", "Water aerobics", "Physical therapy exercises", "Yoga (gentle)"]
    }

    sports = sports_by_goal.get(fitness_goal, [])

    if preferred_exercise == "Cardio":
        cardio_sports = ["Running", "Cycling", "Swimming", "HIIT", "Cross-Training", "Zumba", "Spinning", "Rowing", "Elliptical", "Stair Climbing", "Cross-Training", "Dance Cardio", "Indoor Cycling", "Walking", "Aerobics"]
        sports = list(set(sports) & set(cardio_sports)) or cardio_sports
    elif preferred_exercise == "Strength Training":
        strength_sports = ["Weightlifting", "Bodybuilding", "Powerlifting", "Crossfit", "Calisthenics", "Olympic Lifting", "TRX", "Kettlebells", "Functional Training", "Barre", "Pilates Reformer", "Strongman", "Bodyweight Training", "Kettlebell Training", "Pilates", "Plyometrics"]
        sports = list(set(sports) & set(strength_sports)) or strength_sports
    elif preferred_exercise == "Yoga":
        yoga_sports = ["Yoga", "Hot Yoga", "Restorative Yoga"]
        sports = list(set(sports) & set(yoga_sports)) or yoga_sports
    elif preferred_exercise == "Pilates":
        pilates_sports = ["Pilates", "Pilates Reformer"]
        sports = list(set(sports) & set(pilates_sports)) or pilates_sports
    elif preferred_exercise == "HIIT":
        hiit_sports = ["HIIT", "Crossfit"]
        sports = list(set(sports) & set(hiit_sports)) or hiit_sports
    elif preferred_exercise == "Swimming":
        swimming_sports = ["Swimming", "Water aerobics"]
        sports = list(set(sports) & set(swimming_sports)) or swimming_sports
    elif preferred_exercise == "Running":
        running_sports = ["Running", "Long-distance running"]
        sports = list(set(sports) & set(running_sports)) or running_sports
    elif preferred_exercise == "Cycling":
        cycling_sports = ["Cycling", "Spinning"]
        sports = list(set(sports) & set(cycling_sports)) or cycling_sports
    elif preferred_exercise == "Walking":
        walking_sports = ["Walking", "Long-distance walking"]
        sports = list(set(sports) & set(walking_sports)) or walking_sports
    elif preferred_exercise == "Weightlifting":
        weightlifting_sports = ["Weightlifting", "Bodybuilding", "Powerlifting", "Olympic Lifting"]
        sports = list(set(sports) & set(weightlifting_sports)) or weightlifting_sports
    elif preferred_exercise == "Bodybuilding":
        bodybuilding_sports = ["Bodybuilding"]
        sports = list(set(sports) & set(bodybuilding_sports)) or bodybuilding_sports
    elif preferred_exercise == "Crossfit":
        crossfit_sports = ["Crossfit"]
        sports = list(set(sports) & set(crossfit_sports)) or crossfit_sports
    elif preferred_exercise == "Dance":
        dance_sports = ["Dance", "Zumba", "Dance Cardio"]
        sports = list(set(sports) & set(dance_sports)) or dance_sports
    elif preferred_exercise == "Martial Arts":
        martial_arts_sports = ["Martial Arts"]
        sports = list(set(sports) & set(martial_arts_sports)) or martial_arts_sports
    elif preferred_exercise == "Rock Climbing":
        rock_climbing_sports = ["Rock Climbing"]
        sports = list(set(sports) & set(rock_climbing_sports)) or rock_climbing_sports



    return random.choice(sports) if sports else "No Suggestion"


# ... (rest of the code remains the same)

def write_to_csv(data, filename="fitness_data.csv"):
    """Writes data to a CSV file."""
    with open(filename, 'w', newline='') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(["Age", "Gender", "Height", "Weight", "RestingHR", "WorkoutHR", "FitnessGoal", "PreferredExercise", "SuggestedSport"])  # Header row
        writer.writerows(data)

# Generate and save data (e.g., 500 rows):
num_rows_to_generate = 50500 #You can change the number of rows
fitness_data = generate_fitness_data(num_rows_to_generate)
write_to_csv(fitness_data)

print(f"Generated {num_rows_to_generate} rows of fitness data and saved to fitness_data.csv")


Generated 50500 rows of fitness data and saved to fitness_data.csv


In [3]:
pip install pandas 

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder


ValueError: numpy.dtype size changed, may indicate binary incompatibility. Expected 96 from C header, got 88 from PyObject

In [1]:

# 1. Load the data
df = pd.read_csv("fitness_data.csv")

# 2. Data Exploration and Visualization

# Basic info
print(df.info())
print(df.describe())

# Distribution of numerical features
df.hist(figsize=(15, 10))
plt.show()


# Now you're ready to train a model!
# Example (using a simple Logistic Regression for multi-class classification):
# from sklearn.linear_model import LogisticRegression
# model = LogisticRegression(max_iter=1000)  # Adjust max_iter as needed
# model.fit(X_train, y_train)

# ... (Model evaluation and further steps would follow)

ValueError: numpy.dtype size changed, may indicate binary incompatibility. Expected 96 from C header, got 88 from PyObject