In [1]:
#IMPORTS

#File IO
import os
import glob

#Data manipulation
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

#Scikit learn
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, SGDRegressor, BayesianRidge
from sklearn.ensemble import RandomForestRegressor, HistGradientBoostingRegressor

from sklearn.metrics import mean_squared_error, r2_score

#Misc
from tqdm import tqdm


In [2]:
#HYPERPARMETERS

test_proportion = .2
max_images = 200

In [3]:
#GET TARGET DATA

def get_cps(file):
    data = np.load(file)
    bands = data['rad'].reshape(128 * 128, 16)
    cps = np.log1p(data['l2_cps']).flatten().reshape(-1, 1)
    return bands, cps

In [4]:
#CREATE DATASET

scaler = StandardScaler()
file_list = glob.glob('../ABI_Data_Scaled/*.npz')

X = []
y = []

for i, file in enumerate(file_list):
    features, target = get_cps(file)
    X.append(features)
    y.append(target)
    if i == max_images: break

X = scaler.fit_transform(np.concatenate(X, axis=0))
y = np.concatenate(y, axis=0).flatten()

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=test_proportion, random_state=1)

In [5]:
#DIFFERENT MODELS

lin_model = LinearRegression()
forest_model = RandomForestRegressor(n_estimators=20, max_depth=7)
sgd_model = SGDRegressor(max_iter=1000, tol=1e-3, random_state=1)
ridge_model = BayesianRidge()
grad_boost_model = HistGradientBoostingRegressor()


In [6]:
#LINEAR REGRESSION
lin_model.fit(X_train, y_train);

y_pred = lin_model.predict(X_val)
r2 = r2_score(y_val, y_pred)
mse = mean_squared_error(y_val, y_pred)

print(f"R²: {r2:.4f}")
print(f"MSE: {mse:.4f}")

R²: 0.2988
MSE: 1.6157


In [7]:
#SGD
sgd_model.fit(X_train, y_train);

y_pred = sgd_model.predict(X_val)
r2 = r2_score(y_val, y_pred)
mse = mean_squared_error(y_val, y_pred)

print(f"R²: {r2:.4f}")
print(f"MSE: {mse:.4f}")

R²: 0.4210
MSE: 1.3343


In [8]:
#RIDGE
ridge_model.fit(X_train, y_train);

y_pred = ridge_model.predict(X_val)
r2 = r2_score(y_val, y_pred)
mse = mean_squared_error(y_val, y_pred)

print(f"R²: {r2:.4f}")
print(f"MSE: {mse:.4f}")

  arrmean = umr_sum(arr, axis, dtype, keepdims=True, where=where)


R²: 0.4235
MSE: 1.3285


  [Vh.T, Vh / (eigen_vals_ + lambda_ / alpha_)[:, np.newaxis], XT_y]


In [9]:
#RANDOM FOREST
forest_model.fit(X_train, y_train);

y_pred = forest_model.predict(X_val)
r2 = r2_score(y_val, y_pred)
mse = mean_squared_error(y_val, y_pred)

print(f"R²: {r2:.4f}")
print(f"MSE: {mse:.4f}")

R²: 0.6087
MSE: 0.9017


In [10]:
#HIST GRAD BOOST
grad_boost_model.fit(X_train, y_train);

y_pred = grad_boost_model.predict(X_val)
r2 = r2_score(y_val, y_pred)
mse = mean_squared_error(y_val, y_pred)

print(f"R²: {r2:.4f}")
print(f"MSE: {mse:.4f}")

R²: 0.7392
MSE: 0.6009
