In [1]:
import pandas as pd
import numpy as np

In [2]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.multioutput import MultiOutputRegressor
from sklearn.metrics import mean_squared_error, r2_score
from xgboost import XGBRegressor

In [3]:
train = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")

In [4]:
input_cols = [col for col in train.columns if "Component" in col or "fraction" in col]
output_cols = [col for col in train.columns if "BlendProperty" in col]

In [5]:
from sqlalchemy import create_engine

user = "root"
password = "%40Shwath1"   # @ → %40
host = "localhost"
database = "fuel_blend"

engine = create_engine(f"mysql+mysqlconnector://root:%40Shwath1@localhost/fuel_blend")



In [6]:
df = pd.read_sql("SELECT * FROM train_data", engine)
df

Unnamed: 0,Component1_fraction,Component2_fraction,Component3_fraction,Component4_fraction,Component5_fraction,Component1_Property1,Component2_Property1,Component3_Property1,Component4_Property1,Component5_Property1,...,BlendProperty1,BlendProperty2,BlendProperty3,BlendProperty4,BlendProperty5,BlendProperty6,BlendProperty7,BlendProperty8,BlendProperty9,BlendProperty10
0,0.21,0.00,0.42,0.25,0.12,-0.021782,1.981250,0.020036,0.140315,1.032030,...,0.489143,0.607589,0.321670,-1.236050,1.601130,1.384660,0.305850,0.193460,0.580374,-0.762738
1,0.02,0.33,0.19,0.46,0.00,-0.224339,1.148040,-1.107840,0.149533,-0.354000,...,-1.257480,-1.475280,-0.437385,-1.402910,0.147941,-1.143240,-0.439171,-1.379040,-1.280990,-0.503626
2,0.08,0.08,0.18,0.50,0.16,0.457763,0.242591,-0.922492,0.908213,0.972003,...,1.784350,0.450467,0.622687,1.375610,-0.428790,1.161620,0.601289,0.872950,0.660000,2.024580
3,0.25,0.42,0.00,0.07,0.26,-0.577734,-0.930826,0.815284,0.447514,0.455717,...,-0.066422,0.483730,-1.865440,-0.046295,-0.163820,-0.209693,-1.840570,0.300293,-0.351336,-1.551910
4,0.26,0.16,0.08,0.50,0.00,0.120415,0.666268,-0.626934,2.725360,0.392259,...,-0.118913,-1.172400,0.301785,-1.787410,-0.493361,-0.528049,0.286344,-0.265192,0.430513,0.735073
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1995,0.50,0.12,0.00,0.26,0.12,0.279523,-0.054170,-0.391227,0.400222,1.032030,...,-0.028366,-0.327297,-0.316933,-1.294090,-0.530259,-0.421526,-0.320869,0.709627,-0.737244,-0.744289
1996,0.19,0.31,0.00,0.37,0.13,-0.887185,0.610050,0.178606,1.083150,-2.822750,...,-0.449245,0.156778,-0.367445,-0.938615,-0.577451,-0.209996,-0.370505,-0.195531,-0.032834,0.269718
1997,0.38,0.06,0.14,0.31,0.11,0.568978,-0.196759,-0.646318,-0.980070,1.032030,...,0.029135,0.164890,-0.092942,-1.134490,-0.437479,-0.695636,-0.101073,0.063650,0.624368,-0.477053
1998,0.50,0.16,0.00,0.18,0.16,-0.067453,0.321977,-0.137535,0.238507,0.017455,...,-0.232960,-0.464947,0.112536,-0.793522,-0.811272,-1.194910,0.100644,0.760116,-0.751394,-0.857598


In [7]:
X = train[input_cols]
y = train[output_cols]
X_test = test[input_cols]

In [8]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_test_scaled = scaler.transform(X_test)

In [9]:
X_train, X_val, y_train, y_val = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42
)

In [10]:
xgb = XGBRegressor(
    n_estimators=300,
    learning_rate=0.05,
    max_depth=6,
    subsample=0.8,
    colsample_bytree=0.8,
    random_state=42
)

In [11]:
model = MultiOutputRegressor(xgb)
model.fit(X_train, y_train)

0,1,2
,estimator,"XGBRegressor(...ree=None, ...)"
,n_jobs,

0,1,2
,objective,'reg:squarederror'
,base_score,
,booster,
,callbacks,
,colsample_bylevel,
,colsample_bynode,
,colsample_bytree,0.8
,device,
,early_stopping_rounds,
,enable_categorical,False


In [12]:
import joblib

# Save the trained model to a file in your project folder
joblib.dump(model, 'multioutput_model.pkl')

['multioutput_model.pkl']

In [13]:
y_val_pred = model.predict(X_val)

rmse = np.sqrt(mean_squared_error(y_val, y_val_pred))
r2 = r2_score(y_val, y_val_pred, multioutput='raw_values')

print(f"Overall RMSE: {rmse:.4f}")
for i, col in enumerate(output_cols):
    print(f"{col}: R² = {r2[i]:.4f}")

Overall RMSE: 0.2501
BlendProperty1: R² = 0.9295
BlendProperty2: R² = 0.9379
BlendProperty3: R² = 0.9501
BlendProperty4: R² = 0.9304
BlendProperty5: R² = 0.9849
BlendProperty6: R² = 0.9145
BlendProperty7: R² = 0.9480
BlendProperty8: R² = 0.9114
BlendProperty9: R² = 0.9095
BlendProperty10: R² = 0.9453


In [14]:
y_test_pred = model.predict(X_test_scaled)

# Prepare submission format
submission = pd.DataFrame(y_test_pred, columns=output_cols)
submission.insert(0, "ID", test["ID"])

submission.to_csv("blend_predictions.csv", index=False)
print("✅ Predictions saved to blend_predictions.csv")

✅ Predictions saved to blend_predictions.csv


In [15]:
import pandas as pd
from sqlalchemy import create_engine

# === Step 1: Load CSV file ===
csv_file = "blend_predictions.csv"   # <-- change to your CSV filename
df = pd.read_csv(csv_file)

print("Preview of CSV data:")
print(df.head())

# === Step 2: Connect to MySQL ===
# Format: mysql+pymysql://username:password@host/database
engine = create_engine(f"mysql+mysqlconnector://root:%40Shwath1@localhost/fuel_blend")

# === Step 3: Load data into MySQL table ===
table_name = "predictions"   # <-- choose table name
df.to_sql(table_name, con=engine, if_exists='replace', index=False)

Preview of CSV data:
   ID  BlendProperty1  BlendProperty2  BlendProperty3  BlendProperty4  \
0   1       -0.122131        0.113156        0.683808        0.276493   
1   2       -0.480374       -0.626711       -1.209690       -0.165829   
2   3        1.497324        0.921745        0.931253        0.751314   
3   4       -0.206875        0.238127        0.631574        0.027107   
4   5       -0.094945       -0.656973        1.091224        0.275456   

   BlendProperty5  BlendProperty6  BlendProperty7  BlendProperty8  \
0        0.305375        0.653795        0.629879        0.266046   
1       -0.709930        0.005959       -1.243659       -1.259334   
2        1.896341        1.353293        0.887263        1.722382   
3        1.903574       -0.258437        0.616768        0.934026   
4        2.523094        0.292833        1.042226        0.297502   

   BlendProperty9  BlendProperty10  
0       -0.297578         0.309459  
1       -0.697720         0.087960  
2        0.437

500