In [35]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor


In [36]:
df = pd.read_csv("zainfaisal_pakistan_concrete_data.csv")
print(f"Reading dataset... Total row: {df.shape[0]} rows")

df_local = pd.read_csv("localdataset.csv")
print(f"Reading dataset... Total row: {df_local.shape[0]} rows")

def process_kaggle_data(df):
    recipe_cols = [
        'Cement', 'Blast Furnace Slag', 'Fly Ash', 
        'Water', 'Superplasticizer', 
        'Coarse Aggregate', 'Fine Aggregate'
    ]

    df[recipe_cols] = df[recipe_cols].round(3)
    df_filtered = df[df['Age'].isin([7, 28])].copy()

    df_grouped = df_filtered.groupby(recipe_cols + ['Age'])['Strength'].mean().reset_index()
    df_pivot = df_grouped.pivot_table(
        index=recipe_cols, 
        columns='Age', 
        values='Strength'
    ).reset_index()
    df_clean = df_pivot.dropna(subset=[7, 28])
    df_clean = df_clean.rename(columns={7: 'Strength_7', 28: 'Strength_28'})
    print(f"Original Rows: {len(df)}")
    print(f"Usable Paired Mixes Found: {len(df_clean)}")
    
    return df_clean[['Strength_7', 'Strength_28']]


Reading dataset... Total row: 1030 rows
Reading dataset... Total row: 227 rows


In [37]:
# --- STEP 1: TRAIN BASE MODEL (ON KAGGLE) ---

# Process the Kaggle data to get pairs
kaggle_pairs = process_kaggle_data(df)

# X = 7-Day Strength, Y = 28-Day Strength
X_kaggle = kaggle_pairs[['Strength_7']]
y_kaggle = kaggle_pairs['Strength_28']

# We use Linear Regression here because the strength gain curve is usually highly linear/logarithmic
base_model = LinearRegression()
base_model.fit(X_kaggle, y_kaggle)

print(f"Base Model trained. Coefficient: {base_model.coef_[0]:.2f}")
print("Meaning: For every 1 MPa at 7 days, we expect {:.2f} MPa at 28 days (before weather adjustment).".format(base_model.coef_[0]))

Original Rows: 1030
Usable Paired Mixes Found: 114
Base Model trained. Coefficient: 1.07
Meaning: For every 1 MPa at 7 days, we expect 1.07 MPa at 28 days (before weather adjustment).


In [38]:
# --- PART 3: TRAINING THE CORRECTION MODEL ---

# 1. Generate Base Predictions (What the Lab Model thinks will happen)
# This assumes Standard Temperature (~20C) because that's what Kaggle is.
df_local['Base_Pred_Lab'] = base_model.predict(df_local[['Strength_7']])

# 2. Calculate the Residual (The "Temperature Effect")
# Residual = Actual Real World - Predicted Lab World
df_local['Residual'] = df_local['Strength_28'] - df_local['Base_Pred_Lab']

# 3. Train the Correction Model
# Input: Average Temperature
# Output: The deviation (Residual)
# We use Random Forest to capture non-linear effects (like the Crossover Effect)
correction_model = RandomForestRegressor(n_estimators=100, random_state=42)

# Reshape X to be 2D array: [[35], [10], [22]...]
X_correction = df_local[['avgTemp']]
y_correction = df_local['Residual']

correction_model.fit(X_correction, y_correction)

print("\nSUCCESS: Correction Model Trained on Temperature.")


SUCCESS: Correction Model Trained on Temperature.


In [39]:
def predict_with_temperature(strength_7, avg_temp):
    """
    Predicts 28-day strength using 7-day data + Temperature correction.
    """
    # 1. Get the Standard Lab Prediction
    base_pred = base_model.predict([[strength_7]])[0]
    
    # 2. Get the Temperature Correction
    # (e.g., High temps might produce a negative correction)
    temp_correction = correction_model.predict([[avg_temp]])[0]
    
    final_pred = base_pred + temp_correction
    
    return {
        "7_Day_Input": strength_7,
        "Avg_Temp": avg_temp,
        "Lab_Curve_Pred": round(base_pred, 2),
        "Temp_Correction": round(temp_correction, 2),
        "Final_Prediction": round(final_pred, 2)
    }

# --- TEST SCENARIOS ---
# Compare the same 7-day strength under Hot vs. Cold conditions
print("\n--- SCENARIO TEST: Same 7-Day Strength (25 MPa) ---")
print("Hot Weather (35°C):", predict_with_temperature(25, 35.0))
print("Standard Lab (20°C):", predict_with_temperature(25, 20.0))
print("Cold Weather (5°C):", predict_with_temperature(25, 5.0))


--- SCENARIO TEST: Same 7-Day Strength (25 MPa) ---
Hot Weather (35°C): {'7_Day_Input': 25, 'Avg_Temp': 35.0, 'Lab_Curve_Pred': np.float64(37.86), 'Temp_Correction': np.float64(-4.88), 'Final_Prediction': np.float64(32.97)}
Standard Lab (20°C): {'7_Day_Input': 25, 'Avg_Temp': 20.0, 'Lab_Curve_Pred': np.float64(37.86), 'Temp_Correction': np.float64(-1.63), 'Final_Prediction': np.float64(36.22)}
Cold Weather (5°C): {'7_Day_Input': 25, 'Avg_Temp': 5.0, 'Lab_Curve_Pred': np.float64(37.86), 'Temp_Correction': np.float64(-1.63), 'Final_Prediction': np.float64(36.22)}


