In [None]:
import numpy as np
import pandas as pd

# Set random seed for reproducibility
np.random.seed(42)

# Define crop types
FARMER_CROPS = ["Tomato", "Potato", "Pepper", "Corn", "Apple", "Grape"]
crop_map = {c: i for i, c in enumerate(FARMER_CROPS)}

# Number of samples
num_samples = 2000

# Generate random synthetic features
data = {
    "crop_type": np.random.choice(list(crop_map.values()), num_samples),
    "disease_percent": np.random.uniform(0, 100, num_samples),
    "soil_moisture": np.random.uniform(10, 90, num_samples),
    "temperature": np.random.uniform(10, 40, num_samples),
    "humidity": np.random.uniform(30, 90, num_samples),
    "rainfall": np.random.uniform(50, 300, num_samples),
    "soil_ph": np.random.uniform(4.5, 8.5, num_samples),
    "region_risk_factor": np.random.uniform(0, 1, num_samples)
}

# Create DataFrame
df = pd.DataFrame(data)

# Synthetic formula for DCRI (alpha_score)
df["alpha_score"] = (
    0.4 * (df["disease_percent"] / 100) +             # disease % impact
    0.2 * np.abs(df["temperature"] - 25) / 25 +       # deviation from ideal temp
    0.1 * (1 - df["soil_moisture"] / 100) +           # low moisture increases risk
    0.1 * df["region_risk_factor"] +                  # regional risk impact
    0.1 * np.abs(df["soil_ph"] - 6.5) / 4 +           # non-neutral soil adds risk
    np.random.normal(0, 0.02, num_samples)            # noise
)

# Normalize to [0, 1]
df["alpha_score"] = df["alpha_score"].clip(0, 1)

# Optional: replace numeric crop_type with crop name for readability
df["crop_name"] = df["crop_type"].map({v: k for k, v in crop_map.items()})

# Reorder columns for clarity
df = df[[
    "crop_name", "crop_type", "disease_percent", "soil_moisture",
    "temperature", "humidity", "rainfall", "soil_ph",
    "region_risk_factor", "alpha_score"
]]

# Save dataset
df.to_csv("synthetic_dcri_dataset_v2.csv", index=False)
print("âœ… Synthetic DCRI dataset created and saved as 'synthetic_dcri_dataset_v2.csv'")

# Display first few rows
print(df.head(10))


âœ… Synthetic DCRI dataset created and saved as 'synthetic_dcri_dataset_v2.csv'
  crop_name  crop_type  disease_percent  soil_moisture  temperature  \
0      Corn          3        32.901744      53.842589    27.628810   
1     Apple          4        47.018259      51.685855    34.617754   
2    Pepper          2        74.427907      38.016239    21.768011   
3     Apple          4        87.183278      64.197873    23.426556   
4     Apple          4        43.365533      70.680150    31.900052   
5    Potato          1         4.366166      11.170261    20.732643   
6    Pepper          2        16.167254      26.579384    11.917028   
7    Pepper          2         2.086114      67.058869    12.230038   
8    Pepper          2        65.326302      40.929614    13.460884   
9     Apple          4        33.231463      62.998716    36.090440   

    humidity    rainfall   soil_ph  region_risk_factor  alpha_score  
0  42.470623  165.590446  4.576391            0.347524     0.282371 

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, r2_score
import joblib

# Load dataset
df = pd.read_csv("synthetic_dcri_dataset_v2.csv")

# Select features and target
X = df[[
    "crop_type", "disease_percent", "soil_moisture", "temperature",
    "humidity", "rainfall", "soil_ph", "region_risk_factor"
]]
y = df["alpha_score"]

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Feature scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train a Random Forest Regressor
model = RandomForestRegressor(
    n_estimators=200,
    max_depth=12,
    random_state=42,
    n_jobs=-1
)
model.fit(X_train_scaled, y_train)

# Evaluate model
y_pred = model.predict(X_test_scaled)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"âœ… Model trained successfully!")
print(f"Mean Absolute Error: {mae:.4f}")
print(f"RÂ² Score: {r2:.4f}")

# Save the model and scaler
joblib.dump((model, scaler), "dcri_model.pkl")
print("ðŸ’¾ Model saved as 'dcri_model.pkl'")

# Sample prediction test
sample = X_test.iloc[0:1]
predicted_alpha = model.predict(scaler.transform(sample))
print(f"\nExample Crop: {df.loc[sample.index[0], 'crop_name']}")
print(f"Predicted DCRI (Î±): {predicted_alpha[0]:.3f}")


âœ… Model trained successfully!
Mean Absolute Error: 0.0259
RÂ² Score: 0.9303
ðŸ’¾ Model saved as 'dcri_model.pkl'

Example Crop: Corn
Predicted DCRI (Î±): 0.330
