<a href="https://colab.research.google.com/github/wajdi404/09_Python_NumPy_Module/blob/main/TinyML.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import requests
import zipfile
import os

# URL of the dataset
url = 'https://zenodo.org/records/4686379/files/iotj_dataset_zenodo.zip?download=1'

# Define the local filename to save the dataset
local_filename = 'iotj_dataset_zenodo.zip'

# Download the file from the URL
response = requests.get(url, stream=True)

# Save the file locally
with open(local_filename, 'wb') as file:
    for chunk in response.iter_content(chunk_size=128):
        file.write(chunk)

print(f"Dataset downloaded and saved as {local_filename}")

Dataset downloaded and saved as iotj_dataset_zenodo.zip
Dataset extracted to 'iotj_dataset' folder


['iotj_dataset_zenodo']

In [None]:
local_filename = '.zip'
# Unzip the downloaded file
with zipfile.ZipFile(local_filename, 'r') as zip_ref:
    zip_ref.extractall("iotj_dataset")
    print("Dataset extracted to 'iotj_dataset' folder")

In [None]:
# Check the contents of the folder
os.listdir("/content/iotj_dataset/iotj_dataset_zenodo")

['photos', 'localization', 'ranging', 'readme.md']

In [None]:
def print_directory_tree(root_dir, indent=""):
  # Print the current directory
    print(f"{indent}|-- {os.path.basename(root_dir)}/")

    # Get the list of directories in the current directory
    items = os.listdir(root_dir)
    dirs = [item for item in items if os.path.isdir(os.path.join(root_dir, item))]

    # Recursively print the directories
    for d in dirs:
        print_directory_tree(os.path.join(root_dir, d), indent + "    ")


# Print the directory structure for the "iotj_dataset"
print_directory_tree("/content/iotj_dataset/iotj_dataset_zenodo/ranging")

|-- ranging/
    |-- code/
    |-- data/
        |-- 3db/
            |-- 3db_rec_16/
            |-- 3db_rec_3/
                |-- backup/
            |-- 3db_rec_11/
            |-- 3db_rec_1/
            |-- 3db_rec_6/
            |-- 3db_rec_9/
            |-- 3db_rec_4/
            |-- 3db_rec_13/
            |-- 3db_rec_12/
            |-- 3db_rec_17/
            |-- 3db_rec_10/
            |-- 3db_rec_15/
            |-- 3db_rec_7/
            |-- 3db_rec_5/
            |-- backup/
                |-- 3db_rec_16/
                |-- 3db_rec_3/
                    |-- backup/
                |-- 3db_rec_11/
                |-- 3db_rec_1/
                |-- 3db_rec_6/
                |-- 3db_rec_19/
                |-- 3db_rec_9/
                |-- 3db_rec_4/
                |-- 3db_rec_13/
                |-- 3db_rec_12/
                |-- 3db_rec_17/
                |-- 3db_rec_10/
                |-- 3db_rec_15/
                |-- 3db_rec_7/
                |-- 3db_rec_5/


In [27]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Load the CSV files


dw_rec_1_pd = pd.read_csv( "/content/iotj_dataset/iotj_dataset_zenodo/ranging/data/decawave/dw_rec_1/data.csv")
dw_rec_2_pd = pd.read_csv( "/content/iotj_dataset/iotj_dataset_zenodo/ranging/data/decawave/dw_rec_2/data.csv")
dw_rec_3_pd = pd.read_csv( "/content/iotj_dataset/iotj_dataset_zenodo/ranging/data/decawave/dw_rec_3/data.csv")
dw_rec_4_pd = pd.read_csv( "/content/iotj_dataset/iotj_dataset_zenodo/ranging/data/decawave/dw_rec_4/data.csv")

# Extract 'true_dist' and 'measured_dist' columns from each DataFrame
dw_rec_1_subset = dw_rec_1_pd[['true_dist', 'measured_dist']]
dw_rec_2_subset = dw_rec_2_pd[['true_dist', 'measured_dist']]
dw_rec_3_subset = dw_rec_3_pd[['true_dist', 'measured_dist']]
dw_rec_4_subset = dw_rec_4_pd[['true_dist', 'measured_dist']]

# Concatenate the extracted columns from all four files into one DataFrame
combined_df = pd.concat([dw_rec_1_subset, dw_rec_2_subset, dw_rec_3_subset, dw_rec_4_subset], ignore_index=True)

In [28]:
combined_df

Unnamed: 0,true_dist,measured_dist
0,1,0.953603
1,1,1.004472
2,1,0.943430
3,1,0.943430
4,1,0.963777
...,...,...
5195,3,3.491634
5196,3,3.441934
5197,3,3.511514
5198,3,3.451874


In [29]:
# Shuffle the combined DataFrame
shuffled_df = combined_df.sample(frac=1).reset_index(drop=True)

In [31]:
shuffled_df

Unnamed: 0,true_dist,measured_dist
0,3,2.964814
1,2,1.909937
2,3,2.964814
3,2,2.357582
4,5,4.773893
...,...,...
5195,5,5.948919
5196,2,2.318715
5197,2,2.378355
5198,2,2.030455


### **Random Forest Regressor**

In [36]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error
import joblib

X = shuffled_df[['measured_dist']]
y = shuffled_df['true_dist']

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train the model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

In [38]:
# Make predictions and evaluate the model
y_pred = model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)

# Calculate the mean of the true distances
mean_true_distance = np.mean(y_test)

# Convert MAE and MSE to percentages
mae_percentage = (mae / mean_true_distance) * 100
mse_percentage = (mse / mean_true_distance) * 100

# Print results
print(f'Mean Absolute Error (as a percentage): {mae_percentage:.2f}%')
print(f'Mean Squared Error (as a percentage): {mse_percentage:.2f}%')

Mean Absolute Error (as a percentage): 0.09%
Mean Squared Error (as a percentage): 0.06%


### **Random Forest Regressor**