In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, BatchNormalization, Dropout
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from tensorflow.keras.callbacks import EarlyStopping
from PIL import Image
import torch
import model_loader
import pipeline
from transformers import CLIPTokenizer
from pathlib import Path

# Set device for computation
DEVICE = "cpu"
ALLOW_CUDA = True
ALLOW_MPS = False

if torch.cuda.is_available() and ALLOW_CUDA:
    DEVICE = "cuda"
elif (torch.has_mps or torch.backends.mps.is_available()) and ALLOW_MPS:
    DEVICE = "mps"

print(f"Using device: {DEVICE}")

# Load model weights
model_file = "C:\\Users\\HP\\Desktop\\phenotypic traits prediction\\pytorch-stable-diffusion-main\\Data\\v1-5-pruned-emaonly.ckpt"
models = model_loader.preload_models_from_standard_weights(model_file, DEVICE)

# Load tokenizer
tokenizer = CLIPTokenizer("../data/vocab.json", merges_file="../data/merges.txt")

# Load and preprocess dataset
df = pd.read_csv("C:\\Users\\HP\\Downloads\\enhanced_plant_gene_modification_realistic.csv")
df = df.fillna("Unknown")  # Handle missing values
df.columns = df.columns.str.strip().str.title()

# Create 'Is_Crispr' column based on 'Crispr_Test_Evidence'
if 'Crispr_Test_Evidence' in df.columns:
    df['Is_Crispr'] = df['Crispr_Test_Evidence'].apply(
        lambda x: 1 if str(x).strip().lower() not in ['no experimental data', 'unknown', '0', ''] else 0
    )
else:
    print("Warning: 'Crispr_Test_Evidence' column not found. Defaulting 'Is_Crispr' to 0.")
    df['Is_Crispr'] = 0

# Encode categorical variables
le_dict = {}
categorical_columns = ['Plant_Name', 'Species', 'Gene_Name', 'Gene_Function']

for col in categorical_columns:
    if col in df.columns:
        le_dict[col] = LabelEncoder()
        df[col] = le_dict[col].fit_transform(df[col].astype(str).str.lower())

# Feature selection
features = [
    'Plant_Name', 'Species', 'Gene_Name', 'Is_Crispr', 'Trait_Change_Potential',
    'Baseline_Trait_Value', 'Predicted_Trait_Change', 'Drought_Tolerance_Index',
    'Heat_Stress_Impact', 'Expression_Level'
]

# Ensure all features are numeric
for col in features:
    if not np.issubdtype(df[col].dtype, np.number):
        le = LabelEncoder()
        df[col] = le.fit_transform(df[col].astype(str))

# Target variable creation
df['Target'] = (df['Trait_Change_Potential'] + df['Predicted_Trait_Change'] +
                df['Drought_Tolerance_Index'] + df['Heat_Stress_Impact']) / 4
df['Target'] = (df['Target'] > df['Target'].mean()).astype(int)

# Split dataset
X = df[features]
y = df['Target']
feature_order = X.columns.tolist()

# Scale features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Define the Neural Network model
def create_model(input_dim):
    model = Sequential([
        Dense(128, input_dim=input_dim, activation='relu'),
        BatchNormalization(),
        Dropout(0.3),
        Dense(64, activation='relu'),
        Dropout(0.3),
        Dense(32, activation='relu'),
        Dropout(0.3),
        Dense(1, activation='sigmoid')
    ])
    model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])
    return model

# Train the model
model = create_model(X_train.shape[1])
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
history = model.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_test, y_test), callbacks=[early_stopping])

# Prediction function
def predict_modification(plant_name, species, gene_name, target_trait, show_output=True):
    try:
        encoded_inputs = {
            'Plant_Name': le_dict['Plant_Name'].transform([plant_name.lower()])[0],
            'Species': le_dict['Species'].transform([species.lower()])[0],
            'Gene_Name': le_dict['Gene_Name'].transform([gene_name.lower()])[0]
        }
    except (KeyError, ValueError) as e:
        return {"Error": f"Invalid input: {e}"}

    matching_gene = df[
        (df['Plant_Name'] == encoded_inputs['Plant_Name']) & 
        (df['Species'] == encoded_inputs['Species']) & 
        (df['Gene_Name'] == encoded_inputs['Gene_Name'])
    ]

    if matching_gene.empty:
        return {"Error": "The specified gene is not suitable for the given plant."}

    # Default input values for missing features
    input_data = {
        'Is_Crispr': 1,
        'Trait_Change_Potential': 0.5,
        'Baseline_Trait_Value': 1.0,
        'Predicted_Trait_Change': 0.5,
        'Drought_Tolerance_Index': 0.5,
        'Heat_Stress_Impact': 0.5,
        'Expression_Level': 1.0
    }
    input_data.update(encoded_inputs)

    # Convert to DataFrame and scale
    input_df = pd.DataFrame([input_data])[feature_order]
    input_scaled = scaler.transform(input_df)

    # Prediction
    prediction_prob = model.predict(input_scaled)[0][0]
    modification_possible = prediction_prob > 0.5
    trait_change_description = f"Trait change for {target_trait}, with a {prediction_prob * 100:.2f}% success rate."

    # Print results before generating image
    print("\n🔍 **Prediction Results:**")
    print(f"- Modification Possible: {modification_possible}")
    print(f"- Probability: {prediction_prob * 100:.2f}%")
    print(f"- Trait Change Description: {trait_change_description}")
    print(f"- Extra gene required: {'Yes' if prediction_prob < 0.5 else 'No'}")

    # Generate image only after displaying results
    prompt = f"A genetically modified {plant_name} with enhanced {target_trait}, highly detailed plant, ultra sharp, 100mm lens, 4k resolution."
    output_image = pipeline.generate(
        prompt=prompt,
        uncond_prompt="",
        input_image=None,
        strength=0.9,
        do_cfg=True,
        cfg_scale=8,
        sampler_name="ddpm",
        n_inference_steps=50,
        seed=42,
        models=models,
        device=DEVICE,
        idle_device="cpu",
        tokenizer=tokenizer,
    )

    Image.fromarray(output_image).show()

    return {
        "Modification Possible": modification_possible,
        "Probability": f"{prediction_prob * 100:.2f}%",
        "Trait Change Description": trait_change_description,
        "Extra gene required": "Yes" if prediction_prob < 0.5 else "No"
    }

# Get user input
plant_name = input("Enter Plant Name: ")
species = input("Enter Species: ")
gene_name = input("Enter Gene Name: ")
target_trait = input("Enter Target Trait: ")

# Run prediction
result = predict_modification(plant_name, species, gene_name, target_trait)


Using device: cuda


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/20
[1m325/325[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.7806 - loss: 0.4418 - val_accuracy: 0.9700 - val_loss: 0.0967
Epoch 2/20
[1m325/325[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9331 - loss: 0.1584 - val_accuracy: 0.9708 - val_loss: 0.0757
Epoch 3/20
[1m325/325[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9487 - loss: 0.1209 - val_accuracy: 0.9823 - val_loss: 0.0590
Epoch 4/20
[1m325/325[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9486 - loss: 0.1251 - val_accuracy: 0.9815 - val_loss: 0.0639
Epoch 5/20
[1m325/325[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9562 - loss: 0.1020 - val_accuracy: 0.9858 - val_loss: 0.0494
Epoch 6/20
[1m325/325[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9531 - loss: 0.1095 - val_accuracy: 0.9881 - val_loss: 0.0571
Epoch 7/20
[1m325/325[0m 

100%|██████████| 50/50 [46:12<00:00, 55.44s/it]   
