In [None]:
# Check if running on colab or locally
try:
    from google.colab import files
    RUNNING_IN_COLAB = True
    print("Running on Google Colab.")
except ModuleNotFoundError:
    RUNNING_IN_COLAB = False
    print("Running locally.")

In [None]:
# Clone the git repository from "https://github.com/valeriodiste/deep_learning_project" (for the source files)
!git clone https://github.com/valeriodiste/macc-project.git

In [None]:
# Change the working directory to the cloned repository
%cd /content/macc_project
# Pull the latest changes from the repository
!git pull origin main
# Change the working directory to the parent directory
%cd ..

In [None]:
# Install the required packages
%%capture
%pip install 'pytorch-lightning<=2.0.9'
%pip install wandb

In [5]:
# General parameters
REFRESH_DATA = True
WANDB_API_KEY = "2ba6d81dbfe138d5c7fe13aeeeaac296cb88d274"

# Model data parameters
NORMALIZE_SENSOR_DATA = True
NORMALIZATION_RANGE = [-1, 1]
MAX_WEIGHT = 2000
MEASUREMENTS = [
	"accelerometer",
	# "deviceTemperature",	# Not collected, hence always equal to 0
	"gravity",
	"gyroscope",
	"linearAcceleration",
	# "orientation",		# Not collected, hence always equal to <0, 0, 0>
	# "pressure",				# Some devices may not have this sensor, may be always equal to 0
	"rotationVector",
	# "ambientTemperature"	# Some devices may not have this sensor, may be always equal to 0
]

# Model parameters
FNN_HIDDEN_DIM = 1024
FNN_HIDDEN_LAYERS = 3
FNN_ACTIVATION = "LeakyReLU"
FNN_DROPOUT = 0.5
FNN_OPTIMIZER = "AdamW"
FNN_LR = 0.0000001

# Model training parameters
DATA_SPLIT = [0.9, 0.075, 0.025]	# Train, validation, test
BATCH_SIZE = 16
MAX_EPOCHS = 750

# Various paths to save the model
if not RUNNING_IN_COLAB:
	measurements_dir = "./measurements"
	model_path = "./fnn.pth"
	data_dir = "./data"
else:
	measurements_dir = "/content/macc-project/ServerModel/measurements"
	model_path = "/content/fnn.pth"
	data_dir = "/content/data"

In [None]:
# General libraries
import os
import random
import json
import math
# Logger libraries
import wandb
from wandb.sdk import wandb_run
import logging
from pytorch_lightning.loggers import WandbLogger
# Torch libraries
import torch 
from torch import nn
import pytorch_lightning as pl 
# Import the fnn module
if not RUNNING_IN_COLAB:
	from fnn import FNN, ModelData
else:
	# Import from "macc-project.Model.fnn" (for the source files), using a dash, not an underscore
    fnn = __import__("macc-project.ServerModel.fnn", fromlist=["fnn"])
    FNN = fnn.FNN
    ModelData = fnn.ModelData
# Import the tqdm library (for the progress bars)
if not RUNNING_IN_COLAB:
    from tqdm import tqdm
else:
    from tqdm.notebook import tqdm

In [7]:

# Returns the data read from the calibration and measurement files in the corresponding model data directory
def get_model_data(refresh=False):
	# Read data from files in the data directory
	calibration_data = []
	measurements_data = []
	# Check if a data directory exists, if it doesn't, create it
	if not os.path.exists(data_dir):
		os.makedirs(data_dir)
	# Check if data already exists as "measurements.json" and "calibration.json" files in the data directory
	if not refresh and os.path.exists(os.path.join(data_dir, "measurements.json")) and os.path.exists(os.path.join(data_dir, "calibration.json")):
		# Read the data from the files
		with open(os.path.join(data_dir, "measurements.json"), "r") as f:
			measurements_data = json.load(f)
		with open(os.path.join(data_dir, "calibration.json"), "r") as f:
			calibration_data = json.load(f)
		return calibration_data, measurements_data
	else:
		# Build the data dictionaryes, iterating over all files in the directory and getting the corresponding measurements/calibration data
		files = os.listdir(measurements_dir)
		for file in tqdm(files, desc="\nReading data from files...",position=0):
			if file.endswith(".txt"):
				is_calibration = file.startswith("calibration_")
				is_measurement = file.startswith("measurements_")
				if not is_calibration and not is_measurement:
					continue
				data_object = {}
				with open(os.path.join(measurements_dir, file), "r") as f:
					lines = f.readlines()
					data_object["sensor_datas"] = []
					for i in range(len(lines)):
						# Get the line text
						line = lines[i]
						if line == "\n" or line == "" or line == " ":
							continue
						# Check the line data type
						if i == 0:
							# Line contains the weight of the measured object
							data_object["weight"] = float(line)
						elif i == 1:
							# Line contains the name of the corresponding calibration file (as "calibration_<number>.txt")
							data_object["calibration_index"] = int(line.split("_")[1].split(".")[0]) - 1
							# data_object["calibration_file"] = line.strip()
						else:
							# Line contains the sensor data
							sensor_measurement_data = {}
							sensor_measurement_data["accelerometer"] = tuple(map(float, line.split(";")[0].split(",")))
							sensor_measurement_data["deviceTemperature"] = float(line.split(";")[1])
							sensor_measurement_data["gravity"] = tuple(map(float, line.split(";")[2].split(",")))
							sensor_measurement_data["gyroscope"] = tuple(map(float, line.split(";")[3].split(",")))
							sensor_measurement_data["linearAcceleration"] = tuple(map(float, line.split(";")[4].split(",")))
							sensor_measurement_data["orientation"] = tuple(map(float, line.split(";")[5].split(",")))
							sensor_measurement_data["pressure"] = float(line.split(";")[6])
							sensor_measurement_data["rotationVector"] = tuple(map(float, line.split(";")[7].split(",")))
							sensor_measurement_data["ambientTemperature"] = float(line.split(";")[8])
							data_object["sensor_datas"].append(sensor_measurement_data)
				# Add the data object to the corresponding list
				if is_calibration:
					calibration_data.append(data_object)
				elif is_measurement:
					measurements_data.append(data_object)
		# Save the data to files
		with open(os.path.join(data_dir, "measurements.json"), "w") as f:
			json.dump(measurements_data, f, indent=4)
		with open(os.path.join(data_dir, "calibration.json"), "w") as f:
			json.dump(calibration_data, f, indent=4)
		# Return the calibration and measurement data
		return calibration_data, measurements_data


In [None]:
# Seed the random number generator
random_seed = 14
random.seed(random_seed)
torch.manual_seed(random_seed)

# Set the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Get the calibration and measurement data
print("\nGetting the calibration and measurement data...")
calibration_data, measurements_data = get_model_data(REFRESH_DATA)
print("> DONE: Data loaded successfully (calibration: " + str(len(calibration_data)) + ", measurements: " + str(len(measurements_data)) + ")")

# Define the Weights & Biases logger
# Define the wandb logger, api object, entity name and project name
wandb_project_name = "fnn"
wandb_logger = None
# wandb_api = None
wandb_entity = None
wandb_project = None
# Check if a W&B api key is provided
if WANDB_API_KEY == None or WANDB_API_KEY == "":
	print("\nNo W&B API key provided, logging with W&B disabled.")
elif WANDB_API_KEY != "":
	# Login to the W&B (Weights & Biases) API
	wandb.login(key=WANDB_API_KEY, relogin=True)
	# Minimize the logging from the W&B (Weights & Biases) library
	os.environ["WANDB_SILENT"] = "true"
	# os.environ["WANDB_MODE"] = "dryrun"
	logging.getLogger("wandb").setLevel(logging.ERROR)
	# Initialize the W&B (Weights & Biases) loggger
	wandb_logger = WandbLogger(log_model="all", project=wandb_project_name, name="- SEPARATOR -", offline=False)
	# Initialize the W&B (Weights & Biases) API
	# wandb_api = wandb.Api()
	# Get the W&B (Weights & Biases) entity name
	wandb_entity = wandb_logger.experiment.entity
	# Get the W&B (Weights & Biases) project name
	wandb_project = wandb_logger.experiment.project
	# Finish the "separator" experiment
	wandb_logger.experiment.finish(quiet=True)
	# Print the W&B (Weights & Biases) entity and project names, with also the W&B project dashboard URL
	print("\nW&B API key provided, logging with W&B enabled (entity: " + wandb_entity + ", project: " + wandb_project + ")\n> URL: https://wandb.ai/" + wandb_entity + "/" + wandb_project)

# Define the data 
print("\nInitializing the model data...")
data = ModelData(
	measurements_data = measurements_data,
	calibration_data = calibration_data,
	normalize_sensor_data = NORMALIZE_SENSOR_DATA,
	normalization_range = NORMALIZATION_RANGE,
	max_weight=MAX_WEIGHT,
	data_split = DATA_SPLIT,
	batch_size = BATCH_SIZE,
	measurement_types = MEASUREMENTS,
	device = device
)
print("> DONE: Model data initialized successfully")

# Define the model
print("\nInitializing the FNN model...")
model = FNN(
	input_dim = data.input_dim,
	hidden_dim = FNN_HIDDEN_DIM,
	hidden_layers = FNN_HIDDEN_LAYERS,
	activation_fn = FNN_ACTIVATION,
	dropout = FNN_DROPOUT,
	optimizer = FNN_OPTIMIZER,
	output_dim = 1,
	lr = FNN_LR,
	loss_fn = nn.MSELoss(),
	device = device,
	normalize_data = NORMALIZE_SENSOR_DATA,
	normalization_range = NORMALIZATION_RANGE,
	max_weight=MAX_WEIGHT,
	log_on_wandb = True,
	log_on_console = False
)
print("> DONE: Model initialized successfully")

# Restore the model from the file (if it exists)
# if os.path.exists(model_path):
# 	print("\nRestoring the model from file...")
# 	model.load_state_dict(torch.load(model_path))
# 	print("> DONE: Model restored successfully")

# Train the model
model_wandb_logger = None
if wandb_logger != None:
	run_name = "FNN (" + str(FNN_HIDDEN_DIM) + "x" + str(FNN_HIDDEN_LAYERS) + " | LR:" + "{:.0e}".format(FNN_LR) + " | D:" + str(FNN_DROPOUT) + " | " + FNN_OPTIMIZER + " | " + FNN_ACTIVATION + ")"
	model_wandb_logger = WandbLogger(log_model="all", project=wandb_project, name="FNN", offline=False)
	# Also save ALL the constants and hyperparameters to the W&B (Weights & Biases) logger, including the data parameters, ecc...
	model_wandb_logger.log_hyperparams(
		{
			"normalize_sensor_data": NORMALIZE_SENSOR_DATA,
			"normalization_range": NORMALIZATION_RANGE,
			"max_weight": MAX_WEIGHT,
			"measurements": MEASUREMENTS,
			"hidden_dim": FNN_HIDDEN_DIM,
			"hidden_layers": FNN_HIDDEN_LAYERS,
			"activation_fn": FNN_ACTIVATION,
			"dropout": FNN_DROPOUT,
			"optimizer": FNN_OPTIMIZER,
			"lr": FNN_LR,
			"data_split": DATA_SPLIT,
			"batch_size": BATCH_SIZE,
			"max_epochs": MAX_EPOCHS,
			"random_seed": random_seed
		}
	)
print("\nTraining the model...")
trainer = pl.Trainer(
	max_epochs=MAX_EPOCHS,
	num_sanity_val_steps=0,
	logger=model_wandb_logger,
	log_every_n_steps=-1,
	enable_checkpointing=False
)
trainer.fit(model, data)
if wandb_logger != None:
	# Finish the "FNN" experiment
	model_wandb_logger.experiment.finish(quiet=True)
print("\n> DONE: Model trained successfully")

# Save model to file (to restore it later)
print("\nSaving the model to file...")
torch.save(model.state_dict(), model_path)

# Test the model
# print("\nTesting the model...")
# trainer.test(model, data.test_dataloader())
# print("\n> DONE: Model tested successfully")
