In [1]:
from dash import Dash, dcc, html, Input, Output, State, ctx, callback, ALL, MATCH, no_update, Patch, dash_table
import plotly.express as px
import plotly.graph_objects as go
import pandas as pd
import numpy as np
import base64
import io
import os
import joblib
import time
from tkinter import filedialog, Tk
from scipy.stats import gaussian_kde
import dash_bootstrap_components as dbc
from sklearn.preprocessing import MinMaxScaler, StandardScaler, LabelEncoder
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import dash
from dash import dcc, html, dash_table
from dash.dependencies import Input, Output, State
import plotly.express as px
import plotly.graph_objects as go
import pandas as pd
import numpy as np
import base64
import io
import joblib  
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix

In [2]:

#################################################################################################################################
def load_model():    
    def load(model_path):
        try:
            model = joblib.load(model_path)
            return model
        except Exception as e:
            print(f"Error loading model: {e}")
            return None
    root = Tk()
    root.withdraw()  
    file_path = filedialog.askopenfilename(title="   ", filetypes=[("Joblib files", "*.pkl *.joblib"), ("All files", "*.*")])
    model = load(file_path)
    return model
#################################################################################################################################
def load_csv():
    def load(file_path):
        try:
            data = pd.read_csv(file_path)
            return data
        except Exception as e:
            print(f"Error loading CSV file: {e}")
            return None
    root = Tk()
    root.withdraw()  
    file_path = filedialog.askopenfilename(title="      ",filetypes=[("CSV files", "*.csv"), ("All files", "*.*")])
    data = load(file_path)
    return data
##################################################################################################################################
def test_model(model , data ):
    try:
        predictions = model.predict(data)
        return predictions
    except Exception as e:
        print(f"Error making predictions: {e}")
        return None
##################################################################################################################################
def compwper(model , data , output_label ):
    try:
        predictions = test_model(model , data.drop(columns=[output_label]))
        return predictions , data[output_label]
    except Exception as e:
        print(f"Error making predictions: {e}")
        return None
##############################################################plots###############################################################
def plot_distribution_of_clas_in_data(data, output_label):
    try:
        counts = data[output_label].value_counts().sort_index()
        classes = counts.index.tolist()
        values = counts.values.tolist()
        total = sum(values)
        percentages = [(v / total) * 100 for v in values]
        percentage_labels = [f"{p:.2f}%" for p in percentages]

        min_value = min(values)
        max_value = max(values)

        fig = go.Figure()
        fig.add_trace(go.Bar(
            x=classes,
            y=values,
            marker_color='royalblue',
            name='Class Count',
            text=percentage_labels,
            textposition='auto'
        ))

        fig.add_shape(
            type="line",
            x0=-0.5,
            x1=len(classes)-0.5,
            y0=min_value,
            y1=min_value,
            line=dict(color="green", dash="dash"),
        )
        fig.add_shape(
            type="line",
            x0=-0.5,
            x1=len(classes)-0.5,
            y0=max_value,
            y1=max_value,
            line=dict(color="red", dash="dash"),
        )

        fig.add_annotation(
            x=len(classes) - 1, 
            y=(min_value + max_value) / 2,
            ax=len(classes) - 1,
            ay=min_value,
            xref='x', yref='y',
            axref='x', ayref='y',
            showarrow=True,
            arrowhead=3,
            arrowsize=1,
            arrowwidth=2,
            arrowcolor="orange",
            text=f"Diff: {max_value - min_value}",
            font=dict(color="orange")
        )

        fig.update_layout(
            title=f"Class Distribution of '{output_label}' with Min/Max Lines and Percentages",
            xaxis_title="Class",
            yaxis_title="Count",
            showlegend=False
        )

        return fig
    except Exception as e:
        print(f"Error in plotting class balance: {e}")
    

##################################################################################################################################

external_stylesheets = [
    "https://smart-radius.com/assets/libs/bootstrap/css/bootstrap.rtl.min.css",
    "https://smart-radius.com/assets/css/styles.css",
    "https://smart-radius.com/assets/css/icons.min.css",
    "https://smart-radius.com/assets/css/font_cairo.css"
]

#######################################################################################################################################
MODEL_DIR = "models"
os.makedirs(MODEL_DIR, exist_ok=True)
def train_machine_learning_model(X_train, y_train, X_test, y_test, model_name):
    print(f"Starting Machine Learning Model training: {model_name}.")
    model_path = None
    accuracy = np.random.uniform(0.90, 0.99)
    time.sleep(3)
    model_filename = f"{model_name}_model.joblib"
    model_path = os.path.join(MODEL_DIR, model_filename)
    try:
        with open(model_path, "w") as f:
            f.write(f"Placeholder for ML model: {model_name}\nSimulated Accuracy: {accuracy:.4f}")
        print(f"Machine Learning Model {model_name} trained and saved to: {model_path}")
    except Exception as e:
        print(f"Error saving Machine Learning Model {model_name}: {e}")
        return f"Machine Learning Model {model_name} trained but failed to save: {e}", None, 0.0
    return f"Machine Learning Model {model_name} trained successfully (simulation). Accuracy: {accuracy:.4f}", model_path, accuracy





def train_deep_learning_model(X_train, y_train, X_test, y_test, layer_configs, num_layers):
    print(f"Starting Deep Learning Model training with {num_layers} layers.")
    model_path = None
    accuracy = np.random.uniform(0.7, 0.95)  
    time.sleep(5)
    model_filename = "deep_learning_model_placeholder.txt"
    model_path = os.path.join(MODEL_DIR, model_filename)
    try:
        with open(model_path, "w") as f:
            f.write(f"Placeholder for DL model. Configs: {layer_configs}\nSimulated Accuracy: {accuracy:.4f}")
        print(f"Deep Learning Model trained and saved to: {model_path}")
    except Exception as e:
        print(f"Error saving Deep Learning Model: {e}")
        return f"Deep Learning Model trained but failed to save: {e}", None, 0.0
    return f"Deep Learning Model trained successfully (simulation). Accuracy: {accuracy:.4f}", model_path, accuracy

##################################################################################
def plot_3d_surface_with_dropdown(x, y, z, surface_color=None):
    fig = go.Figure()
    fig.add_trace(go.Surface(
        z=z,
        colorscale="Viridis",
        surfacecolor=surface_color if surface_color is not None else z
    ))

    fig.update_layout(
        width=800,
        height=900,
        autosize=False,
        margin=dict(t=0, b=0, l=0, r=0),
        template="plotly_white",
        updatemenus=[
            dict(
                type="buttons",
                direction="left",
                buttons=[
                    dict(args=["type", "surface"], label="3D Surface", method="restyle"),
                    dict(args=["type", "heatmap"], label="Heatmap", method="restyle")
                ],
                pad={"r": 10, "t": 10},
                showactive=True,
                x=0.11,
                xanchor="left",
                y=1.1,
                yanchor="top"
            ),
        ],
        annotations=[
            dict(text="Trace type:", showarrow=False, x=0, y=1.08, yref="paper", align="left")
        ]
    )
    fig.update_scenes(aspectratio=dict(x=1, y=1, z=0.7), aspectmode="manual")
    return fig


In [3]:


def plot_distribution_of_clas_in_data(data, output_label):
    try:
        if pd.api.types.is_numeric_dtype(data[output_label]):
            values = data[output_label].dropna()
            
            mean_val = values.mean()
            median_val = values.median()
            mode_val = values.mode()[0] if not values.mode().empty else None
            
            # IQR Calculation for Outliers
            Q1 = np.percentile(values, 25)
            Q3 = np.percentile(values, 75)
            IQR = Q3 - Q1
            lower_bound = Q1 - 1.5 * IQR
            upper_bound = Q3 + 1.5 * IQR
            outliers = values[(values < lower_bound) | (values > upper_bound)]

            # Histogram
            fig = go.Figure()
            fig.add_trace(go.Histogram(
                x=values,
                nbinsx=30,
                name='Histogram',
                marker_color='lightblue',
                opacity=0.7
            ))

            # KDE Curve
            kde = gaussian_kde(values)
            x_range = np.linspace(values.min(), values.max(), 500)
            fig.add_trace(go.Scatter(
                x=x_range,
                y=kde(x_range) * len(values) * (values.max()-values.min()) / 30,
                mode='lines',
                name='KDE',
                line=dict(color='darkblue', width=2)
            ))

            # Add Mean / Median / Mode
            fig.add_vline(x=mean_val, line=dict(color="blue", dash="dot"), annotation_text=f"Mean: {mean_val:.2f}", annotation_position="top")
            fig.add_vline(x=median_val, line=dict(color="green", dash="dash"), annotation_text=f"Median: {median_val:.2f}", annotation_position="top")
            if mode_val is not None:
                fig.add_vline(x=mode_val, line=dict(color="orange", dash="solid"), annotation_text=f"Mode: {mode_val:.2f}", annotation_position="top")

            # Outlier markers
            if not outliers.empty:
                fig.add_trace(go.Scatter(
                    x=outliers,
                    y=[0]*len(outliers),
                    mode='markers',
                    marker=dict(color='red', size=8, symbol='x'),
                    name='Outliers',
                    showlegend=True
                ))

            fig.update_layout(
                title=f"Numerical Distribution of '{output_label}' with Stats & Outliers",
                xaxis_title=output_label,
                yaxis_title="Count",
                showlegend=True
            )

        else:
            # Categorical as before
            counts = data[output_label].value_counts().sort_index()
            classes = counts.index.tolist()
            values = counts.values.tolist()
            total = sum(values)
            percentages = [(v / total) * 100 for v in values]
            percentage_labels = [f"{p:.2f}%" for p in percentages]

            min_value = min(values)
            max_value = max(values)

            fig = go.Figure()
            fig.add_trace(go.Bar(
                x=classes,
                y=values,
                marker_color='royalblue',
                name='Class Count',
                text=percentage_labels,
                textposition='auto'
            ))

            fig.add_shape(
                type="line",
                x0=-0.5,
                x1=len(classes)-0.5,
                y0=min_value,
                y1=min_value,
                line=dict(color="green", dash="dash"),
            )
            fig.add_shape(
                type="line",
                x0=-0.5,
                x1=len(classes)-0.5,
                y0=max_value,
                y1=max_value,
                line=dict(color="red", dash="dash"),
            )

            fig.add_annotation(
                x=len(classes) - 1, 
                y=(min_value + max_value) / 2,
                ax=len(classes) - 1,
                ay=min_value,
                xref='x', yref='y',
                axref='x', ayref='y',
                showarrow=True,
                arrowhead=3,
                arrowsize=1,
                arrowwidth=2,
                arrowcolor="orange",
                text=f"Diff: {max_value - min_value}",
                font=dict(color="orange")
            )

            fig.update_layout(
                title=f"Class Distribution of '{output_label}' with Min/Max Lines and Percentages",
                xaxis_title="Class",
                yaxis_title="Count",
                showlegend=False
            )

        return fig

    except Exception as e:
        print(f"Error in plotting distribution: {e}")


In [4]:
global global_df

In [5]:
def test():
    app = html.Div([
    html.H1("Model Performance Analyzer"),

    # Hidden Divs for storing intermediate data
    dcc.Store(id='data-store1'),  # To store the uploaded DataFrame as JSON
    dcc.Store(id='model-status-store'), # To store a trigger when model is uploaded
    dcc.Store(id='processed-data-store'), # To store DataFrame with predictions
    dcc.Store(id='metrics-store'), # To store calculated metrics

    html.Div([
        html.Div([
            html.H3("Step 1: Upload Data (CSV)"),
            dcc.Upload(
                id='upload-data',
                children=html.Div(['Drag and Drop or ', html.A('Select CSV File')]),
                style={
                    'width': '95%', 'height': '60px', 'lineHeight': '60px',
                    'borderWidth': '1px', 'borderStyle': 'dashed',
                    'borderRadius': '5px', 'textAlign': 'center', 'margin': '10px'
                },
                multiple=False # Allow only single file upload
            ),
            html.Div(id='output-data-upload-status'),
        ], style={'width': '48%', 'display': 'inline-block', 'verticalAlign': 'top'}),

        html.Div([
            html.H3("Step 2: Upload Model File (.pkl or .joblib)"),
            dcc.Upload(
                id='upload-model',
                children=html.Div(['Drag and Drop or ', html.A('Select Model File')]),
                style={
                    'width': '95%', 'height': '60px', 'lineHeight': '60px',
                    'borderWidth': '1px', 'borderStyle': 'dashed',
                    'borderRadius': '5px', 'textAlign': 'center', 'margin': '10px'
                },
                multiple=False
            ),
            html.Div(id='output-model-upload-status'),
        ], style={'width': '48%', 'float': 'right', 'display': 'inline-block', 'verticalAlign': 'top'}),
    ]),

    html.Hr(),

    html.Div(id='analysis-section', children=[
        html.Div([
            html.Label("Step 3: Select Column for Analysis:"),
            dcc.Dropdown(id='column-dropdown', placeholder="Upload data first"),
        ], style={'width': '48%', 'display': 'inline-block', 'marginBottom': '20px'}),

        html.Hr(),

        # Plot 1
        html.Div([
            html.H3("Plot 1: Prediction Correctness by Category"),
            dcc.Graph(id='plot1-bar-chart')
        ], style={'width': '48%', 'display': 'inline-block', 'verticalAlign': 'top'}),

        # Plot 2
        html.Div([
            html.H3("Plot 2: Error Distribution by Category"),
            dcc.Graph(id='plot2-pie-chart')
        ], style={'width': '48%', 'float': 'right', 'display': 'inline-block', 'verticalAlign': 'top'}),

        html.Hr(style={'clear': 'both', 'marginTop': '20px', 'marginBottom': '20px'}),

        # Plot 3
        html.Div([
            html.H3("Plot 3: Overall Model Performance Indicators"),
            dcc.Graph(id='plot3-indicators')
        ], style={'width': '98%', 'margin': 'auto'})
    ], style={'display': 'none'}) # Initially hidden until data and model are ready
])
    return app

In [6]:
def get_home_layout():
    from dash import  dcc, html
    import pandas as pd
    import dash_bootstrap_components as dbc
    app= html.Div([
    dcc.Store(id='data-store'), # لتخزين بيانات الـ DataFrame

    html.H1(" Home ", style={'textAlign': 'center', 'color': '#333'}),

    # قسم رفع الملف والرسم البياني التوزيعي
    html.Div([
        html.Label("Upload CSV File:", className="text-white"),
        dcc.Upload(
            id='upload-data',
            children=html.Div(['Drag and drop or ', html.A('Select from your device')]),
            style={
                'width': '100%', 'height': '60px', 'lineHeight': '60px',
                'borderWidth': '1px', 'borderStyle': 'dashed',
                'borderRadius': '5px', 'textAlign': 'center', 'margin-bottom': '10px'
            },
            multiple=False
        ),
        html.Label("Select Column for Distribution Plot:", className="text-white mt-2"),
        dcc.Dropdown(id='x-axis-dropdown', placeholder='Select X-axis column'),
        dcc.Graph(id='scatter-chart')
    ], className="container p-4 bg-light text-right", dir="rtl"), # text-right و dir="rtl" لليمين لليسار


    html.Label("Select Columns for 3D Plot:", className="text-dark mt-3 d-block text-center"), # تعديل ليكون ظاهر
    html.Div([
    dcc.Dropdown(
        id='x-axis-dropdown1',
        placeholder='Select X-axis column (3D)',
        style={'width': '250px', 'fontSize': '18px'}
    ),
    dcc.Dropdown(
        id='y-axis-dropdown1',
        placeholder='Select Y-axis column (3D)',
        style={'width': '250px', 'fontSize': '18px'}
    ),
    dcc.Dropdown(
        id='z-axis-dropdown1',
        placeholder='Select Z-axis column (3D)',
        style={'width': '250px', 'fontSize': '18px'}
    ),
    dcc.Dropdown(
        id='color-dropdown',
        placeholder='Select Color column (3D - optional)',
        style={'width': '250px', 'fontSize': '18px'}
    ),
], style={
    'display': 'flex',
    'gap': '20px',
    'flexWrap': 'wrap',
    'padding': '20px',
    'justifyContent': 'center',
    'alignItems': 'center',
}),
    dcc.Graph(id='3D-chart'),
    html.Div([
        html.Button("download", id="download-data-button", className="btn btn-success mt-3"),
        dcc.Download(id="download-dataframe-csv")
    ], style={'textAlign': 'center', 'padding': '20px'})

], style={'backgroundColor': '#f8f9fa'}) 
    return app

In [7]:







# def update_output(n_clicks, value):
#     if n_clicks == 0:
#         return ""
#     if not value:
#         return "⚠️ لم يتم إدخال أي نص!"
#     return f"✅ تم إدخال النص: {value}" 




def get_another_page_layout():
    try:
        df = pd.read_csv("anime_ed.csv")
    except FileNotFoundError:
    
        print("WARN: 'anime_ed.csv' not found. Using placeholder data.")
        data_placeholder = {
            'name': ['Anime A', 'Anime B', 'Anime C', 'Anime D', 'Anime E', 'Anime F'],
            'genres': ['Action,Adventure', 'Comedy,Slice of Life', 'Action,Drama', 'Sci-Fi,Adventure', 'Comedy,Drama', 'Fantasy,Magic'],
            'rating': [8.5, 7.9, 9.0, 8.2, 7.5, 8.8],
        
        }
        df = pd.DataFrame(data_placeholder)


    df["genres_list"] = df["genre"].fillna("").astype(str).str.split(',')
    df["genres_list"] = df["genres_list"].apply(lambda genre_list: sorted([g.strip() for g in genre_list if g.strip()])) # Sort for consistency


    all_unique_genres = sorted(list(set(g for sublist in df["genres_list"] for g in sublist)))
    for genre_col in all_unique_genres:
        df[genre_col] = df["genres_list"].apply(lambda L: int(genre_col in L))
    app = dbc.Container([
        dbc.Row(dbc.Col(html.H1("Anime Recommendation System", className="text-center text-primary my-4 fw-bold"), width=12)),

        dbc.Card([
            dbc.CardBody([
                dbc.Row([
                    dbc.Col([
                        html.Label("Choose Recommendation Mode:", className="fw-bold"),
                        dcc.Dropdown(
                            id="anime-mode-dropdown",
                            options=[
                                {"label": "Based on Mood/Genre", "value": "mood"},
                                {"label": "Based on Previously Watched Anime", "value": "watched"}
                            ],
                            value="mood",
                            clearable=False
                        )
                    ], md=6),
                    dbc.Col([
                        html.Label("Number of Recommendations:", className="fw-bold"),
                        dcc.Input(
                            id="num-recommendations-input",
                            type="number",
                            value=5,
                            min=1,
                            max=20,
                            step=1,
                            className="form-control" 
                        )
                    ], md=6)
                ], className="mb-3"),
                html.Div(id="anime-sub-selection-area")
            ])
        ], className="mb-4 shadow-sm"),

        dbc.Row(dbc.Col(html.H2("Recommendations", className="mt-4 mb-3 text-secondary"), width=12)),
        dcc.Loading(
            id="loading-results",
            type="default", 
            children=[
                html.Div(id="anime-recommendation-results", className="mt-2")
            ],
            overlay_style={"visibility":"visible", "opacity": 0.2, "backgroundColor": "white"} 
        )
    ], fluid=False, className="py-4") 

    return app

def get_contact_layout(df):

    def generate_data_report(df):
        report_sections = []

        # Null values
        null_counts = df.isnull().sum().reset_index()
        null_counts.columns = ['Column', 'Missing Values']
        report_sections.append(html.H4("Missing Values Per Column:"))
        report_sections.append(dash_table.DataTable(
            data=null_counts.to_dict('records'),
            columns=[{'name': i, 'id': i} for i in null_counts.columns],
            style_table={'overflowX': 'auto'}
        ))
        report_sections.append(html.Hr())

        # Outliers (IQR method)
        outlier_columns = []
        numerical_cols = df.select_dtypes(include=np.number).columns
        if not numerical_cols.empty:
            for col in numerical_cols:
                Q1 = df[col].quantile(0.25)
                Q3 = df[col].quantile(0.75)
                IQR = Q3 - Q1
                lower_bound = Q1 - 1.5 * IQR
                upper_bound = Q3 + 1.5 * IQR
                if ((df[col] < lower_bound) | (df[col] > upper_bound)).any():
                    outlier_columns.append(col)

        report_sections.append(html.H4("Columns with Potential Outliers (IQR method):"))
        if outlier_columns:
            report_sections.append(html.Ul([html.Li(col) for col in outlier_columns]))
        else:
            report_sections.append(html.P("No outliers detected using IQR in numeric columns."))
        report_sections.append(html.Hr())

        # General types
        data_types_general = []
        for col in df.columns:
            dtype = 'Numeric' if pd.api.types.is_numeric_dtype(df[col]) else 'Text/Categorical'
            data_types_general.append({'Column': col, 'General Type': dtype})
        report_sections.append(html.H4("General Data Type per Column:"))
        report_sections.append(dash_table.DataTable(
            data=data_types_general,
            columns=[{'name': 'Column', 'id': 'Column'}, {'name': 'General Type', 'id': 'General Type'}],
            style_table={'overflowX': 'auto'}
        ))
        report_sections.append(html.Hr())

        # Specific types
        data_types_specific = df.dtypes.reset_index()
        data_types_specific.columns = ['Column', 'Specific Type']
        data_types_specific['Specific Type'] = data_types_specific['Specific Type'].astype(str)
        report_sections.append(html.H4("Specific Data Type per Column:"))
        report_sections.append(dash_table.DataTable(
            data=data_types_specific.to_dict('records'),
            columns=[{'name': i, 'id': i} for i in data_types_specific.columns],
            style_table={'overflowX': 'auto'}
        ))
        report_sections.append(html.Hr())

        # Correlation Heatmap
        if not numerical_cols.empty:
            corr_matrix = df[numerical_cols].corr()
            fig = px.imshow(
                corr_matrix,
                text_auto=True,
                aspect="auto",
                title="Correlation Heatmap Between Numeric Columns",
                color_continuous_scale='RdBu',
                zmin=-1,
                zmax=1
            )
            report_sections.append(html.H4("Correlation Heatmap:"))
            report_sections.append(dcc.Graph(figure=fig))

        return html.Div(report_sections)

    # App layout
    app = html.Div([
        html.H2("Fixed Data Report", style={'textAlign': 'center'}),
        generate_data_report(df)
    ])
    return app


In [8]:
common_activation_functions = [
        {'label': 'ReLU', 'value': 'relu'},
        {'label': 'Sigmoid', 'value': 'sigmoid'},
        {'label': 'Tanh', 'value': 'tanh'},
        {'label': 'Softmax', 'value': 'softmax'},
        {'label': 'Linear', 'value': 'linear'},
        {'label': 'Leaky ReLU', 'value': 'leaky_relu'},
        {'label': 'ELU (Exponential Linear Unit)', 'value': 'elu'},
        {'label': 'None (No activation)', 'value': 'none'}
    ]

common_dense_units = [
        {'label': '1 Unit/Class', 'value': 1},
        {'label': '2 Units/Classes', 'value': 2},
        {'label': '3 Units/Classes', 'value': 3},
        {'label': '4 Units/Classes', 'value': 4},
        {'label': '8 Units', 'value': 8},
        {'label': '10 Units/Classes', 'value': 10},
        {'label': '16 Units', 'value': 16},
        {'label': '24 Units', 'value': 24},
        {'label': '32 Units', 'value': 32},
        {'label': '48 Units', 'value': 48},
        {'label': '64 Units', 'value': 64},
        {'label': '96 Units', 'value': 96},
        {'label': '128 Units', 'value': 128},
        {'label': '192 Units', 'value': 192},
        {'label': '256 Units', 'value': 256},
        {'label': '384 Units', 'value': 384},
        {'label': '512 Units', 'value': 512},
        {'label': '768 Units', 'value': 768},
        {'label': '1024 Units', 'value': 1024},
        {'label': '2048 Units', 'value': 2048},
    ]
def get_data_exploration_layout():
    MODEL_DIR = "trained_models"
    if not os.path.exists(MODEL_DIR):
        os.makedirs(MODEL_DIR)
    
    global global_df

    # global_df = pd.DataFrame(global_df)

    def train_deep_learning_model(X_train, y_train, X_test, y_test, layer_configs, num_layers):
        print(f"Starting Deep Learning Model training with {num_layers} layers.")
        model_path = None
        accuracy = np.random.uniform(0.7, 0.95) # Simulated accuracy

        time.sleep(5) 
        
        model_filename = "deep_learning_model_placeholder.txt"
        model_path = os.path.join(MODEL_DIR, model_filename)
        try:
            with open(model_path, "w") as f:
                f.write(f"Placeholder for DL model. Configs: {layer_configs}\nSimulated Accuracy: {accuracy:.4f}")
            print(f"Deep Learning Model trained and saved to: {model_path}")
        except Exception as e:
            print(f"Error saving Deep Learning Model: {e}")
            return f"Deep Learning Model trained but failed to save: {e}", None, 0.0
        
        print("Deep Learning Model training completed (simulation).")
        return f"Deep Learning Model trained successfully (simulation). Accuracy: {accuracy:.4f}", model_path, accuracy


    def train_machine_learning_model(X_train, y_train, X_test, y_test, model_name):
        print(f"Starting Machine Learning Model training: {model_name}.")
        model_path = None
        accuracy = np.random.uniform(0.65, 0.90)
        time.sleep(3)

        model_filename = f"{model_name}_model.joblib"
        model_path = os.path.join(MODEL_DIR, model_filename)
        try:
            with open(model_path, "w") as f:
                f.write(f"Placeholder for ML model: {model_name}\nSimulated Accuracy: {accuracy:.4f}")
            print(f"Machine Learning Model {model_name} trained and saved to: {model_path}")
        except Exception as e:
            print(f"Error saving Machine Learning Model {model_name}: {e}")
            return f"Machine Learning Model {model_name} trained but failed to save: {e}", None, 0.0
        
        print(f"Machine Learning Model {model_name} training completed.")
        return f"Machine Learning Model {model_name} trained successfully (simulation). Accuracy: {accuracy:.4f}", model_path, accuracy


    # app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP], suppress_callback_exceptions=True)

    common_activation_functions = [
        {'label': 'ReLU', 'value': 'relu'},
        {'label': 'Sigmoid', 'value': 'sigmoid'},
        {'label': 'Tanh', 'value': 'tanh'},
        {'label': 'Softmax', 'value': 'softmax'},
        {'label': 'Linear', 'value': 'linear'},
        {'label': 'Leaky ReLU', 'value': 'leaky_relu'},
        {'label': 'ELU (Exponential Linear Unit)', 'value': 'elu'},
        {'label': 'None (No activation)', 'value': 'none'}
    ]

    common_dense_units = [
        {'label': '1 Unit/Class', 'value': 1},
        {'label': '2 Units/Classes', 'value': 2},
        {'label': '3 Units/Classes', 'value': 3},
        {'label': '4 Units/Classes', 'value': 4},
        {'label': '8 Units', 'value': 8},
        {'label': '10 Units/Classes', 'value': 10},
        {'label': '16 Units', 'value': 16},
        {'label': '24 Units', 'value': 24},
        {'label': '32 Units', 'value': 32},
        {'label': '48 Units', 'value': 48},
        {'label': '64 Units', 'value': 64},
        {'label': '96 Units', 'value': 96},
        {'label': '128 Units', 'value': 128},
        {'label': '192 Units', 'value': 192},
        {'label': '256 Units', 'value': 256},
        {'label': '384 Units', 'value': 384},
        {'label': '512 Units', 'value': 512},
        {'label': '768 Units', 'value': 768},
        {'label': '1024 Units', 'value': 1024},
        {'label': '2048 Units', 'value': 2048},
    ]

    ml_output_column_options = [{'label': col, 'value': col} for col in global_df.columns]

    app = dbc.Container([
        dcc.Store(id='deep-learning-layer-configs-store'),
        dcc.Store(id='trained-model-path-store', data=None),
        dcc.Download(id="download-model-component"),

        dbc.Row(
            dbc.Col(html.H2("Model Training ", className="text-center bg-light p-2 mb-4"), width=12)
        ),
        dbc.Row([
            dbc.Col([
                html.H4("Deep Learning Model"),
                dbc.Card(
                    dbc.CardBody([
                        dbc.Row([
                            dbc.Col(html.Label("Number of Layers:"), width=6),
                            dbc.Col(dcc.Input(id='dl-num-layers', type='number', min=1, step=1, value=1, className="mb-2"), width=6),
                        ]),
                        dbc.Row([
                            dbc.Col(html.Label("Layer Configuration:"), width="auto", className="me-2 align-self-center"),
                            dbc.Col(dbc.Button("Configure Layers", id="open-layer-config-modal-button", color="info", className="mb-2"), width="auto"),
                        ]),
                        dbc.Row([
                            dbc.Col(html.Label("Target Column (y):"), width=5),
                            dbc.Col(dcc.Dropdown(
                                    id='dl-output-column', 
                                    options=ml_output_column_options,
                                    placeholder="Select target column for DL",
                                    className="mb-2"
                                ), width=7),
                        ])
                    ])
                , className="mb-4")
            ], md=6),
            dbc.Col([
                html.H4("Machine Learning Model"),
                dbc.Card(
                    dbc.CardBody([
                        dbc.Row([
                            dbc.Col(html.Label("Model Name:"), width=5),
                            dbc.Col(dcc.Dropdown(
                                id='ml-model-name',
                                options=[
                                    {'label': 'Linear Regression', 'value': 'linear_regression'},
                                    {'label': 'Logistic Regression', 'value': 'logistic_regression'},
                                    {'label': 'Support Vector Machine (SVM)', 'value': 'svm'},
                                    {'label': 'Random Forest', 'value': 'random_forest'},
                                    {'label': 'Gradient Boosting', 'value': 'gradient_boosting'},
                                ],
                                placeholder="Select a model",
                                className="mb-2"
                            ), width=7),
                        ]),
                        dbc.Row([
                            dbc.Col(html.Label("Target Column (y):"), width=5),
                            dbc.Col(dcc.Dropdown(
                                    id='ml-output-column',
                                    options=ml_output_column_options,
                                    placeholder="Select output column",
                                    className="mb-2"
                                ), width=7),
                        ]),
                    ])
                , className="mb-4")
            ], md=6),
        ]),
        dbc.Row([
            dbc.Col([
                dbc.Checkbox(
                    id="train-dl-checkbox",
                    label="Train Deep Learning Model",
                    value=False,
                    className="me-3"
                ),
                dbc.Checkbox(
                    id="train-ml-checkbox",
                    label="Train Machine Learning Model",
                    value=False, 
                    className="me-3" 
                ),
            ], width="auto", className="text-md-end align-self-center"),
            dbc.Col(
                dbc.Button("Train Selected Models", id='train-button', color="primary", n_clicks=0),
                width="auto", className="text-md-start mt-2 mt-md-0 align-self-center"
            )
        ], justify="center", className="mt-3 mb-3 align-items-center"),

        dbc.Row(
            dbc.Col(
                dcc.Loading(
                    id="loading-output",
                    type="default",
                    children=html.Div(id='output-message', className="mt-3 text-center"),
                ), 
                width=12
            )
        ),
        dbc.Row(
            dbc.Col(
                html.Div([
                    dbc.Button(
                        "Download Trained Model", 
                        id="download-model-button", 
                        color="success", 
                        className="mt-3", 
                        n_clicks=0,
                        style={'display': 'none'} 
                    )
                ], id='download-button-container', className="text-center"),
                width=12
            )
        ),
        dbc.Modal(
            [
                dbc.ModalHeader(dbc.ModalTitle("Configure Deep Learning Layers")),
                dbc.ModalBody(html.Div(id="dynamic-layer-input-container")),
                dbc.ModalFooter(
                    [
                        dbc.Button("Save Configuration", id="save-layer-config-button", color="success", className="ms-auto", n_clicks=0),
                        dbc.Button("Close", id="close-layer-config-modal-button", color="secondary", className="ms-auto", n_clicks=0)
                    ]
                ),
            ],
            id="layer-config-modal",
            is_open=False,
            size="lg",
            backdrop="static",
        ),
    ], fluid=True)

    return app
########################################################################################################################
################################################################################################################################################################################################################################################
########################################################################################################################
########################################################################################################################





In [9]:
def get_preprocessing_layout():
    global global_df
    # global_df=global_df
    app1= html.Div([
        html.Div([
            html.H2("Preprocessing Data", className="text-center my-4"),
            html.Br(),
            dcc.Dropdown(
                options=[
                    {"label": "Remove null values", "value": "remove_null"},
                    {"label": "Normalization", "value": "Normalization"},
                    {"label": "Standardization", "value": "Standardization"},
                    {"label": "Encoding", "value": "Encoding"},
                    {"label": "Remove duplicates", "value": "remove_duplicates"},
                    {"label": "Drop specific feature", "value": "drop_feature"},
                    {"label": "Dimensionality Reduction (PCA)", "value": "pca"},
                ],
                value=[],
                multi=True,
                placeholder="Select preprocessing methods",
                id="preprocessing-dropdown"
            ),
            html.Br(),
            html.Div([
                html.Button("Configure & Run", id="open-modal-button", className="btn btn-primary"),
                html.Button("Download New Data", id="download-new-data-button", className="btn btn-success"), 
            ], className="d-flex justify-content-center gap-3 my-3"),
            dcc.Download(id="download-dataframe-csv"),
        ], className="container"),
        dbc.Modal(
            [
                dbc.ModalHeader(dbc.ModalTitle("Configure Preprocessing Steps")),
                dbc.ModalBody(id="preprocessing-options-modal-body", children=[
                ]),
                dbc.ModalFooter([
                    dbc.Button("Cancel", id="cancel-preprocessing", color="secondary", className="ms-auto"),
                    dbc.Button("Apply Changes", id="apply-preprocessing-button", color="success")
                ]),
            ],
            id="preprocessing-options-modal",
            is_open=False,
            size="lg",
            backdrop="static", 
        ),
        html.Div(className="container mt-4", children=[
            dash_table.DataTable(
                id='data-table',
                columns=[{"name": col, "id": col} for col in global_df.columns],
                data=global_df.to_dict('records'),
                page_size=10,
                style_table={'overflowX': 'auto', 'marginTop': '20px'},
                style_cell={'textAlign': 'center', 'fontFamily': 'sans-serif', 'padding': '8px', 'minWidth': '100px', 'width': '150px', 'maxWidth': '200px'},
                style_header={'backgroundColor': '#f8f9fa', 'fontWeight': 'bold', 'border': '1px solid #dee2e6'},
                style_data={'border': '1px solid #dee2e6'},
                style_data_conditional=[{'if': {'row_index': 'odd'}, 'backgroundColor': '#f2f2f2'}],
                filter_action="native",
                sort_action="native",
                sort_mode="multi",
                page_action="native",
            )
        ]),
        dcc.Store(id='selected-preprocessing-methods-store', data=[]),
        dcc.Store(id='current-data-store', data=global_df.to_dict('records')), 
        dcc.Store(id='current-columns-store', data=[{"name": col, "id": col} for col in global_df.columns]) 
    ])
    return app1 , global_df

In [10]:
import dash
import dash_bootstrap_components as dbc
from dash import html, dcc, Input, Output
import plotly.express as px
import pandas as pd
from dash import callback
from dash import Dash, html, dcc, callback, Input, Output, State, ctx, ALL

external_stylesheets = [
    dbc.themes.BOOTSTRAP,
    "https://smart-radius.com/assets/libs/bootstrap/css/bootstrap.rtl.min.css",
    "https://smart-radius.com/assets/css/styles.css",
    "https://smart-radius.com/assets/css/icons.min.css",
    "https://smart-radius.com/assets/css/font_cairo.css"
]

global global_df 
global_df = pd.DataFrame()
# Main Dash app
app = dash.Dash(__name__,
                external_stylesheets=external_stylesheets,
                suppress_callback_exceptions=True,
                title="Multi-Page & Preprocessing App")

server = app.server

# Sidebar and content styles
SIDEBAR_STYLE = {
    "position": "fixed",
    "top": 0,
    "left": 0,
    "bottom": 0,
    "width": "18rem",
    "padding": "2rem 1rem",
    "background-color": "#f8f9fa",
    "overflow-y": "auto",
}
CONTENT_STYLE = {
    "margin-left": "19rem",
    "margin-right": "1rem",
    "padding": "2rem 1rem",
}

# Sidebar navigation
sidebar = html.Div(
    [
        html.H2("SKY AI", className="display-6 text-center"),
        html.Hr(),
        html.P("Select a page:", className="lead text-center"),
        dbc.Nav(
            [
                dbc.NavLink("Home", href="/", active="exact"),
                dbc.NavLink("Report In Data", href="/contact", active="exact"),
                dbc.NavLink("Data Preprocessing", href="/preprocessing", active="exact"),
                dbc.NavLink(" Model Training", href="/data-exploration", active="exact"),
                dbc.NavLink("Model Test", href="/test", active="exact"),
                dbc.NavLink("Anime Recommendation System", href="/another-page", active="exact"),
            ],
            vertical=True,
            pills=True,
        ),
    ],
    style=SIDEBAR_STYLE,
)

# Main layout
app.layout = html.Div([
    dcc.Location(id="url", refresh=False),
    sidebar,
    html.Div(id="page-content", style=CONTENT_STYLE)
])


# Callbacks for multi-page navigation
@app.callback(
    Output("page-content", "children"),
    [Input("url", "pathname")]
)
def display_page(pathname):
    global global_df
    if pathname == "/":
        return get_home_layout()
    elif pathname == "/data-exploration":
        return get_data_exploration_layout()
    elif pathname == "/preprocessing":
        
        app11 , global_df =get_preprocessing_layout()
        return app11
    elif pathname == "/another-page":
        return get_another_page_layout()
    elif pathname == "/contact":
        return get_contact_layout(global_df)
    elif pathname == "/test":
        return test()
    else:
        return dbc.Container([
            html.H1("404: Page Not Found", className="text-danger"),
            html.Hr(),
            html.P(f"The pathname '{pathname}' was not recognised..."),
            html.P("Please select a page from the menu."),
        ], className="py-5 text-center")

@app.callback(
    Output('output', 'children'),
    Input('print-button', 'n_clicks'),
    Input('text-input', 'value')
)
def update_output(n_clicks, value):
    if n_clicks == 0:
        return ""
    if not value:
        return "⚠️ "
    return f"✅  {value}" 
# --- Callback to Reset Data  ---
@callback(
    Output('current-data-store', 'data', allow_duplicate=True),
    Output('current-columns-store', 'data', allow_duplicate=True),
    Output('data-table', 'data', allow_duplicate=True),
    Output('data-table', 'columns', allow_duplicate=True),
    Output('preprocessing-dropdown', 'value', allow_duplicate=True), # Added allow_duplicate=True
    Input('reset-data-button', 'n_clicks'), # Keep this line IF you intend to keep the old reset button ID somewhere else or reuse it
    prevent_initial_call=True
)
def reset_data(n_clicks):
    # Check if the trigger was indeed the reset button (if you keep the input)
    # triggered_id = ctx.triggered_id
    # if n_clicks and triggered_id == 'reset-data-button':
    if n_clicks: # If you keep the original Input for reset-data-button
        print("Resetting data to original state.")
        columns = [{"name": col, "id": col} for col in global_df.columns]
        data = global_df.to_dict('records')
        # Reset stored data, table data, table columns, and dropdown
        return data, columns, data, columns, []
    # If triggered by something else or n_clicks is None/0
    return no_update, no_update, no_update, no_update, no_update


# --- Callback to Open Modal and Generate Options ---
@callback(
    Output("preprocessing-options-modal", "is_open"),
    Output("preprocessing-options-modal-body", "children"),
    Output('selected-preprocessing-methods-store', 'data'), # Store selected methods
    Input("open-modal-button", "n_clicks"),
    Input("cancel-preprocessing", "n_clicks"),
    State("preprocessing-dropdown", "value"), # Get methods selected in dropdown
    State("preprocessing-options-modal", "is_open"),
    State("current-columns-store", "data"), # Get current columns
    prevent_initial_call=True,
)
def toggle_modal_and_generate_options(n_open, n_cancel, selected_methods, is_open, current_columns_list):
    triggered_id = ctx.triggered_id
    print(f"Modal Triggered ID: {triggered_id}")

    if triggered_id == "open-modal-button" and selected_methods:
        modal_content = []
        # Ensure current_columns_list is not None and is a list of dicts
        if current_columns_list and isinstance(current_columns_list, list):
             current_columns = [col['id'] for col in current_columns_list if isinstance(col, dict) and 'id' in col]
        else:
             print("Warning: current_columns_list is invalid or empty.")
             current_columns = [] # Fallback to empty list

        if not current_columns:
             # Attempt to get columns from global_df as a last resort if store is empty/invalid
             current_columns = global_df.columns.tolist()
             print("Warning: Using columns from global_df as fallback.")


        for method in selected_methods:
            # Ensure method is a string before using replace
            if not isinstance(method, str):
                 print(f"Warning: Skipping invalid method value: {method}")
                 continue

            method_div = [html.H5(f"Options for: {method.replace('_', ' ').title()}", className="mt-3"), html.Hr()]

            # --- Column Selector for the method ---
            method_div.append(html.Label(f"Select columns to apply '{method.replace('_', ' ').title()}' on:"))
            method_div.append(
                dcc.Dropdown(
                    options=[{"label": c, "value": c} for c in current_columns],
                    value=[], # Start with no columns selected
                    multi=True,
                    id={'type': 'method-column-selector', 'index': method}
                )
            )
            method_div.append(html.Br()) # Add some space

            # --- Specific Options based on method ---
            if method == "Normalization":
                method_div.append(html.Label("Normalization Type:"))
                method_div.append(dcc.RadioItems(
                    options=[
                        {'label': ' Min-Max (Scales to [0, 1])', 'value': 'min_max'},
                        {'label': ' Z-Score (Standard Scaler)', 'value': 'z_score'} # Note: Z-score *is* Standardization
                    ],
                    value='min_max', # Default value
                    id={'type': 'method-option', 'index': f"{method}-type"},
                    inline=True, # Use inline or list based on preference
                    # Consider using dbc.RadioItems for better Bootstrap integration
                ))
            elif method == "Encoding":
                method_div.append(html.Label("Encoding Type:"))
                method_div.append(dcc.RadioItems(
                    options=[
                         {'label': ' One-Hot Encoding', 'value': 'one_hot'},
                         {'label': ' Label Encoding', 'value': 'label'}
                    ],
                     value='one_hot', # Default value
                     id={'type': 'method-option', 'index': f"{method}-type"},
                     inline=True
                ))
            elif method == "drop_feature":
                 # Modify the label slightly for clarity
                 method_div[1] = html.Label(f"Select columns (features) to drop:") # Modify the label generated above
            elif method == "pca":
                method_div.append(html.Label("Number of Components (<= number of selected numeric columns):"))
                method_div.append(dbc.Input( # Use dbc.Input for better styling
                     type="number",
                     min=1,
                     step=1,
                     placeholder="Enter number of components",
                     id={'type': 'method-option', 'index': f"{method}-components"}
                ))

            modal_content.append(dbc.Card(dbc.CardBody(method_div), className="mb-3"))

        if not modal_content:
             modal_content = [html.P("Please select at least one preprocessing method from the dropdown.")]
             return False, modal_content, no_update
        return True, modal_content, selected_methods

    elif triggered_id == "cancel-preprocessing":
         print("Cancel button clicked.")
         return False, no_update, no_update
    elif triggered_id == "open-modal-button" and not selected_methods:
        return False, html.P("Please select preprocessing methods first.", className="text-danger"), []
    return is_open, no_update, no_update


# --- Callback for Applying Preprocessing Steps ---
@callback(
    Output('current-data-store', 'data', allow_duplicate=True),    
    Output('current-columns-store', 'data', allow_duplicate=True), 
    Output('data-table', 'data', allow_duplicate=True),            
    Output('data-table', 'columns', allow_duplicate=True),         
    Output("preprocessing-options-modal", "is_open", allow_duplicate=True), 
    Output("preprocessing-options-modal-body", "children", allow_duplicate=True), 
    Input("apply-preprocessing-button", "n_clicks"),
    State('selected-preprocessing-methods-store', 'data'),         
    State({'type': 'method-column-selector', 'index': ALL}, 'value'),
    State({'type': 'method-column-selector', 'index': ALL}, 'id'),    
    State({'type': 'method-option', 'index': ALL}, 'value'),          
    State({'type': 'method-option', 'index': ALL}, 'id'),            
    State('current-data-store', 'data'),                            
    prevent_initial_call=True,
    allow_duplicate=True,
)
def apply_preprocessing_steps(n_clicks,
                              selected_methods,
                              column_selector_values, column_selector_ids,
                              option_values, option_ids,
                              current_data_records):
    if n_clicks is None or not selected_methods:
        print("Apply button not clicked or no methods selected in store.")
        return no_update, no_update, no_update, no_update, False, []

    print("\n--- Applying Preprocessing ---")
    print(f"Methods selected in store: {selected_methods}")
    if not current_data_records:
         print("Error: No current data found in store. Cannot apply preprocessing.")
         return no_update, no_update, no_update, no_update, False, html.P("Error: Data store is empty.", className="text-danger")
    processed_df = pd.DataFrame.from_records(current_data_records)
    method_columns = {}
    if column_selector_ids and column_selector_values:
         method_columns = {comp_id['index']: value for comp_id, value in zip(column_selector_ids, column_selector_values) if isinstance(comp_id, dict) and 'index' in comp_id}

    method_options = {}
    if option_ids and option_values:
        method_options = {comp_id['index']: value for comp_id, value in zip(option_ids, option_values) if isinstance(comp_id, dict) and 'index' in comp_id}
    print(f"Columns selected per method: {method_columns}")
    print(f"Options selected per method: {method_options}")
    errors = [] 
    for method in selected_methods:
        if not isinstance(method, str):
             print(f"Warning: Skipping invalid method value from store: {method}")
             continue
        print(f"\nProcessing: {method}")
        columns_to_process = method_columns.get(method, [])
        if columns_to_process is None:
             columns_to_process = []
        elif not isinstance(columns_to_process, list):
             columns_to_process = [columns_to_process] 
        valid_columns_to_process = [col for col in columns_to_process if col in processed_df.columns]
        if len(valid_columns_to_process) != len(columns_to_process):
            print(f"  - Warning: Some selected columns for '{method}' do not exist in the current dataframe. Processing only valid columns: {valid_columns_to_process}")
        columns_to_process = valid_columns_to_process 


        if not columns_to_process and method not in ['remove_null', 'remove_duplicates']:
             print(f"  - Skipping '{method}': No valid columns selected or available for processing.")
             continue 
        try:
            if method == "remove_null":
                subset_to_check = columns_to_process if columns_to_process else None #
                print(f"  - Removing rows with nulls in columns: {'all' if subset_to_check is None else subset_to_check}")
                initial_rows = len(processed_df)
                processed_df.dropna(subset=subset_to_check, inplace=True)
                print(f"  - Rows removed: {initial_rows - len(processed_df)}")
            elif method == "remove_duplicates":
                subset_to_check = columns_to_process if columns_to_process else None 
                print(f"  - Removing duplicate rows based on columns: {'all' if subset_to_check is None else subset_to_check}")
                initial_rows = len(processed_df)
                processed_df.drop_duplicates(subset=subset_to_check, inplace=True)
                print(f"  - Rows removed: {initial_rows - len(processed_df)}")
            elif method == "Normalization":
                norm_type = method_options.get(f"{method}-type", "min_max") 
                print(f"  - Applying {norm_type} normalization to columns: {columns_to_process}")
                numeric_cols = processed_df[columns_to_process].select_dtypes(include=np.number).columns.tolist()
                if not numeric_cols:
                    print("  - Warning: No numeric columns selected or valid for Normalization.")
                    continue
                print(f"    - Numeric columns being processed: {numeric_cols}")
                if norm_type == "min_max":
                    scaler = MinMaxScaler()
                    processed_df[numeric_cols] = scaler.fit_transform(processed_df[numeric_cols])
                elif norm_type == "z_score": 
                    scaler = StandardScaler()
                    processed_df[numeric_cols] = scaler.fit_transform(processed_df[numeric_cols])
            elif method == "Standardization":
                 print(f"  - Applying Standardization (Z-Score) to columns: {columns_to_process}")
                 numeric_cols = processed_df[columns_to_process].select_dtypes(include=np.number).columns.tolist()
                 if not numeric_cols:
                    print("  - Warning: No numeric columns selected or valid for Standardization.")
                    continue
                 print(f"    - Numeric columns being processed: {numeric_cols}")
                 scaler = StandardScaler()
                 processed_df[numeric_cols] = scaler.fit_transform(processed_df[numeric_cols])
            elif method == "Encoding":
                encoding_type = method_options.get(f"{method}-type", "one_hot") 
                print(f"  - Applying {encoding_type} encoding to columns: {columns_to_process}")
                categorical_cols = processed_df[columns_to_process].select_dtypes(include=['object', 'category']).columns.tolist()
                if not categorical_cols:
                     print("  - Warning: No categorical columns (object/category) selected or valid for Encoding.")
                     continue
                print(f"    - Categorical columns being processed: {categorical_cols}")

                if encoding_type == "label":
                    for col in categorical_cols:
                        le = LabelEncoder()
                        processed_df[col] = le.fit_transform(processed_df[col].astype(str))
                        print(f"      - Applied Label Encoding to '{col}'")
                elif encoding_type == "one_hot":
                    cols_to_encode = [col for col in categorical_cols if col in processed_df.columns]
                    if cols_to_encode:
                        try:
                            processed_df = pd.get_dummies(processed_df, columns=cols_to_encode, prefix=cols_to_encode, prefix_sep='_ohe_') 
                            print(f"      - Applied One-Hot Encoding, dropped original columns: {cols_to_encode}")
                        except Exception as e_ohe:
                             error_msg = f"Error during One-Hot Encoding for columns '{cols_to_encode}': {e_ohe}"
                             print(f"  - !!! {error_msg}")
                             errors.append(error_msg)
                    else:
                        print(f"      - Columns specified for One-Hot Encoding not found or already processed: {categorical_cols}")
            elif method == "drop_feature":
                print(f"  - Dropping features (columns): {columns_to_process}")
                if columns_to_process: 
                    processed_df.drop(columns=columns_to_process, inplace=True)
                    print(f"    - Dropped: {columns_to_process}")
                else:
                    print(f"    - No valid columns specified or found for dropping.")
            elif method == "pca":
                n_components_str = method_options.get(f"{method}-components")
                print(f"  - Applying PCA to columns: {columns_to_process}")
                numeric_cols = processed_df[columns_to_process].select_dtypes(include=np.number).columns.tolist()
                if not numeric_cols:
                    print("  - Warning: No numeric columns selected or valid for PCA.")
                    continue
                if not n_components_str:
                     print("  - Skipping PCA: Number of components not specified.")
                     errors.append(f"PCA skipped for columns '{columns_to_process}': Number of components not specified.")
                     continue
                try:
                    n_components = int(n_components_str) 
                except (ValueError, TypeError):
                    print(f"  - Skipping PCA: Invalid number of components specified: '{n_components_str}'. Must be an integer.")
                    errors.append(f"PCA skipped for columns '{columns_to_process}': Invalid number of components '{n_components_str}'.")
                    continue
                if n_components <= 0:
                    print("  - Skipping PCA: Number of components must be positive.")
                    errors.append(f"PCA skipped for columns '{columns_to_process}': Number of components ({n_components}) must be positive.")
                    continue
                if n_components > len(numeric_cols):
                    print(f"  - Warning: Number of components ({n_components}) is greater than the number of selected numeric features ({len(numeric_cols)}). Setting components to {len(numeric_cols)}.")
                    n_components = len(numeric_cols) 
                if n_components == 0:
                     print("  - Skipping PCA: No valid numeric features available after validation.")
                     continue
                print(f"    - Numeric columns being processed: {numeric_cols}")
                print(f"    - Number of components: {n_components}")
                pca_data = processed_df[numeric_cols].copy()
                if pca_data.isnull().values.any():
                    print("    - Warning: NaN values found in PCA columns. Filling with column mean.")
                    for col in numeric_cols:
                        if pca_data[col].isnull().any():
                            pca_data[col].fillna(pca_data[col].mean(), inplace=True)
                pca_data = pca_data.astype(float)
                pca = PCA(n_components=n_components)
                pca_result = pca.fit_transform(pca_data)
                pca_cols = [f"PCA_{i+1}" for i in range(n_components)]
                pca_df = pd.DataFrame(pca_result, columns=pca_cols, index=pca_data.index)
                cols_to_drop_pca = [col for col in numeric_cols if col in processed_df.columns]
                processed_df.drop(columns=cols_to_drop_pca, inplace=True)
                processed_df = pd.concat([processed_df, pca_df], axis=1)
                print(f"    - Dropped original columns: {cols_to_drop_pca}")
                print(f"    - Added PCA columns: {pca_cols}")
        except Exception as e:
             error_msg = f"Error processing method '{method}' on columns '{columns_to_process}': {e}"
             print(f"  - !!! {error_msg}")
             import traceback
             traceback.print_exc()
             errors.append(error_msg)
    print("\n--- Preprocessing Complete ---")
    modal_feedback = [] 
    if errors:
        print("Errors occurred during processing:")
        error_items = [html.Li(err) for err in errors]
        modal_feedback = [html.Div([
             html.H5("Processing Errors Occurred:", className="text-danger"),
             html.Ul(error_items)
             ], className="alert alert-danger")] 
        for err in errors:
            print(f"- {err}")
    final_columns = [{"name": col, "id": col} for col in processed_df.columns]
    final_data = processed_df.to_dict('records')
    global global_df
    global_df = processed_df.copy()
    return final_data, final_columns, final_data, final_columns, False, [] 
    



###################################################################################################model coal backe
@app.callback(
    Output("layer-config-modal", "is_open"),
    Output("dynamic-layer-input-container", "children"),
    [Input("open-layer-config-modal-button", "n_clicks"),
     Input("close-layer-config-modal-button", "n_clicks"),
     Input("save-layer-config-button", "n_clicks")],
    [State("layer-config-modal", "is_open"),
     State("dl-num-layers", "value"),
     State("deep-learning-layer-configs-store", "data")]
)
def toggle_modal_and_generate_inputs(n_open, n_close, n_save, is_open, num_layers, stored_configs):
    ctx = dash.callback_context
    triggered_id = ctx.triggered[0]['prop_id'].split('.')[0] if ctx.triggered else None

    if not isinstance(num_layers, int) or num_layers < 1:
        num_layers = 1 

    new_children = []
    if triggered_id == "open-layer-config-modal-button":
        current_configs = stored_configs if stored_configs else []
        if len(current_configs) < num_layers:
            current_configs.extend([{} for _ in range(num_layers - len(current_configs))])
        elif len(current_configs) > num_layers:
            current_configs = current_configs[:num_layers]

        for i in range(num_layers):
            layer_conf = current_configs[i] if i < len(current_configs) else {}
            units_value = layer_conf.get('units')
            activation_value = layer_conf.get('activation')
            
            units_label_text = f"Layer {i+1} Units (Neurons):"
            if num_layers == 1: 
                 units_label_text = "Units (e.g., Input Features if first, Output Classes if last):"
            elif i == 0: 
                 units_label_text = "Layer 1 Units (Input Shape/Features):"
            elif i == num_layers - 1: 
                 units_label_text = f"Layer {num_layers} Units (Number of Output Classes/Values):"

            layer_input_row = dbc.Row([
                dbc.Col(html.H6(f"Layer {i+1} Configuration:"), width=12, className="mt-3 mb-1 text-primary"),
                dbc.Col([
                    html.Label(units_label_text),
                    dcc.Dropdown(
                        id={'type': 'dynamic-layer-units', 'index': i},
                        options=common_dense_units,
                        value=units_value,
                        placeholder="Select units/classes for layer"
                    )
                ], md=6),
                dbc.Col([
                    html.Label("Activation Function:"),
                    dcc.Dropdown(
                        id={'type': 'dynamic-layer-activation', 'index': i},
                        options=common_activation_functions,
                        value=activation_value,
                        placeholder="Select activation for layer"
                    )
                ], md=6)
            ], className="mb-3 align-items-center border-bottom pb-3")
            new_children.append(layer_input_row)
        return True, new_children

    if triggered_id in ["close-layer-config-modal-button", "save-layer-config-button"]:
        return False, dash.no_update 

    return is_open, dash.no_update

@app.callback(
    Output("deep-learning-layer-configs-store", "data"),
    Input("save-layer-config-button", "n_clicks"),
    [State({'type': 'dynamic-layer-units', 'index': ALL}, 'value'),
     State({'type': 'dynamic-layer-activation', 'index': ALL}, 'value'),
     State("dl-num-layers", "value")],
    prevent_initial_call=True
)
def save_layer_configurations(n_clicks_save, layer_units_values, layer_activation_values, num_layers):
    if n_clicks_save == 0:
        return dash.no_update

    configurations = []
    if num_layers and isinstance(num_layers, int) and num_layers > 0:
        safe_units_values = layer_units_values if layer_units_values is not None else []
        safe_activation_values = layer_activation_values if layer_activation_values is not None else []

        for i in range(num_layers):
            units = safe_units_values[i] if i < len(safe_units_values) else None
            activation = safe_activation_values[i] if i < len(safe_activation_values) else None
            configurations.append({'layer': i + 1, 'units': units, 'activation': activation})
    return configurations

@app.callback(
    [Output('output-message', 'children'),
     Output('download-model-button', 'style'),
     Output('trained-model-path-store', 'data')],
    [Input('train-button', 'n_clicks')],
    [State('dl-num-layers', 'value'),
     State('deep-learning-layer-configs-store', 'data'),
     State('dl-output-column', 'value'),
     State('ml-model-name', 'value'),
     State('ml-output-column', 'value'),
     State('train-dl-checkbox', 'value'),
     State('train-ml-checkbox', 'value')]
)
def handle_train_model(n_clicks, num_dl_layers, dl_layer_configs, dl_output_col, ml_model_name, ml_output_col, train_dl_selected, train_ml_selected):
    default_download_style = {'display': 'none'}
    no_model_path_stored = None

    if n_clicks is None or n_clicks == 0:
        return "Select models to train and click 'Train Selected Models' to begin.", default_download_style, no_model_path_stored

    ctx = dash.callback_context
    if not ctx.triggered:
        return "Select models to train and click 'Train Selected Models' to begin.", default_download_style, no_model_path_stored
        
    output_messages = []
    latest_trained_model_path = None
    download_button_style = default_download_style.copy()
    test_size = 0.2 

    X_dl_train, X_dl_test, y_dl_train, y_dl_test = None, None, None, None
    if train_dl_selected and dl_output_col:
        if dl_output_col in global_df.columns:
            y_dl = global_df[dl_output_col]
            X_dl = global_df.drop(columns=[dl_output_col])
            if len(X_dl) > 1 : # Need at least 2 samples for train_test_split
                 X_dl_train, X_dl_test, y_dl_train, y_dl_test = train_test_split(X_dl, y_dl, test_size=test_size, random_state=42)
            else:
                output_messages.append(html.Div(f"Warning: Not enough data for DL model to split into training/testing sets (Features: {X_dl.shape[0]}). Using all data for training.", className="text-warning"))
                X_dl_train, y_dl_train = X_dl, y_dl # Use all data if too small
                X_dl_test, y_dl_test = X_dl, y_dl # Or handle this case appropriately
        else:
            output_messages.append(html.Div(f"Error: DL Target column '{dl_output_col}' not found in data.", className="text-danger"))
            train_dl_selected = False

    X_ml_train, X_ml_test, y_ml_train, y_ml_test = None, None, None, None
    if train_ml_selected and ml_output_col:
        if ml_output_col in global_df.columns:
            y_ml = global_df[ml_output_col]
            X_ml = global_df.drop(columns=[ml_output_col])
            if len(X_ml) > 1:
                X_ml_train, X_ml_test, y_ml_train, y_ml_test = train_test_split(X_ml, y_ml, test_size=test_size, random_state=42)
            else:
                output_messages.append(html.Div(f"Warning: Not enough data for ML model to split into training/testing sets (Features: {X_ml.shape[0]}). Using all data for training.", className="text-warning"))
                X_ml_train, y_ml_train = X_ml, y_ml
                X_ml_test, y_ml_test = X_ml, y_ml 
        else:
            output_messages.append(html.Div(f"Error: ML Target column '{ml_output_col}' not found in data.", className="text-danger"))
            train_ml_selected = False

    if not train_dl_selected and not train_ml_selected:
        if not output_messages:
            output_messages.append(html.P("No model type selected for training, or target column missing for selected models."))
        return html.Div(output_messages), default_download_style, no_model_path_stored

    if train_dl_selected:
        if X_dl_train is not None and y_dl_train is not None and X_dl_test is not None and y_dl_test is not None :
            if not num_dl_layers or not dl_layer_configs:
                output_messages.append(html.Div("Deep Learning Model: Configuration missing. Training skipped.", className="text-warning"))
            else:
                valid_configs = all(conf.get('units') is not None and conf.get('activation') is not None for conf in dl_layer_configs)
                if valid_configs:
                    output_messages.append(html.P("Starting Deep Learning Model training..."))
                    try:
                        dl_result_msg, dl_model_path, dl_accuracy = train_deep_learning_model(X_dl_train, y_dl_train, X_dl_test, y_dl_test, dl_layer_configs, num_dl_layers)
                        output_messages.append(html.Div(dl_result_msg, className="text-success"))
                        # output_messages.append(html.P(f"Deep Learning Model Test Accuracy: {dl_accuracy:.4f}"))
                        
                        if dl_model_path:
                            latest_trained_model_path = dl_model_path
                            download_button_style = {'display': 'block', 'margin': 'auto'}
                            dl_config_str_parts = [f"Deep Learning Model - Total Layers: {num_dl_layers}"]
                            dl_config_str_parts.append("Layer-wise Configurations:")
                            for config in dl_layer_configs:
                                units_info = f"Units/Classes: {config['units']}"
                                activation_info = f"Activation: {config['activation']}"
                                dl_config_str_parts.append(f"  Layer {config['layer']}: {units_info}, {activation_info}")
                            output_messages.append(html.Pre("\n".join(dl_config_str_parts)))
                        else:
                            output_messages.append(html.Div("DL Model trained, but path not returned from training function.", className="text-warning"))
                    except Exception as e:
                        output_messages.append(html.Div(f"Error during DL training: {str(e)}", className="text-danger"))
                else:
                     output_messages.append(html.Div("Deep Learning Model: Training skipped due to incomplete layer configurations.", className="text-warning"))
        else:
            output_messages.append(html.Div("Deep Learning Model: Training data (X or y) not prepared or insufficient for split.", className="text-warning"))

    if train_dl_selected and train_ml_selected: 
         output_messages.append(html.Hr())

    if train_ml_selected:
        if X_ml_train is not None and y_ml_train is not None and X_ml_test is not None and y_ml_test is not None:
            if not ml_model_name:
                output_messages.append(html.Div("Machine Learning Model: Model name not selected. Training skipped.", className="text-warning"))
            else:
                output_messages.append(html.P(f"Starting Machine Learning Model training ({ml_model_name})..."))
                try:
                    ml_result_msg, ml_model_path, ml_accuracy = train_machine_learning_model(X_ml_train, y_ml_train, X_ml_test, y_ml_test, ml_model_name)
                    output_messages.append(html.Div(ml_result_msg, className="text-success"))
                    # output_messages.append(html.P(f"Machine Learning Model ({ml_model_name}) Test Accuracy: {ml_accuracy:.4f}"))

                    if ml_model_path:
                        latest_trained_model_path = ml_model_path 
                        download_button_style = {'display': 'block', 'margin': 'auto'} 
                        output_messages.append(html.Pre(f"Machine Learning Model: {ml_model_name}\nOutput Column: {ml_output_col}"))
                    else:
                        output_messages.append(html.Div(f"ML Model {ml_model_name} trained, but path not returned.", className="text-warning"))
                except Exception as e:
                    output_messages.append(html.Div(f"Error during ML training ({ml_model_name}): {str(e)}", className="text-danger"))
        else:
            output_messages.append(html.Div("Machine Learning Model: Training data (X or y) not prepared or insufficient for split.", className="text-warning"))

    if not output_messages:
         output_messages.append(html.P("No models were selected or configured for training."))

    return html.Div(output_messages), download_button_style, latest_trained_model_path


@app.callback(
    Output("download-model-component", "data"),
    Input("download-model-button", "n_clicks"),
    State("trained-model-path-store", "data"),
    prevent_initial_call=True,
)
def download_trained_model(n_clicks, model_path):
    if n_clicks > 0 and model_path:
        if os.path.exists(model_path):
            return dcc.send_file(model_path)
        else:
            print(f"Error: Model file not found at {model_path}")
            # Return a small text file indicating the error to the user for download
            error_content = f"Error: The requested model file was not found on the server at path: {model_path}"
            return dict(content=error_content, filename="model_download_error.txt")
    return dash.no_update



#############################################################################animy
try:
    df = pd.read_csv("anime_ed.csv")
except FileNotFoundError:
  
    print("WARN: 'anime_ed.csv' not found. Using placeholder data.")
    data_placeholder = {
        'name': ['Anime A', 'Anime B', 'Anime C', 'Anime D', 'Anime E', 'Anime F'],
        'genres': ['Action,Adventure', 'Comedy,Slice of Life', 'Action,Drama', 'Sci-Fi,Adventure', 'Comedy,Drama', 'Fantasy,Magic'],
        'rating': [8.5, 7.9, 9.0, 8.2, 7.5, 8.8],
      
    }
    df = pd.DataFrame(data_placeholder)


df["genres_list"] = df["genre"].fillna("").astype(str).str.split(',')
df["genres_list"] = df["genres_list"].apply(lambda genre_list: sorted([g.strip() for g in genre_list if g.strip()])) # Sort for consistency


all_unique_genres = sorted(list(set(g for sublist in df["genres_list"] for g in sublist)))
for genre_col in all_unique_genres:
    df[genre_col] = df["genres_list"].apply(lambda L: int(genre_col in L))

@app.callback(
    Output("anime-sub-selection-area", "children"),
    Input("anime-mode-dropdown", "value")
)
def render_sub_selection(mode):
    if mode == "mood":
        mood_options_for_dropdown = [{"label": genre, "value": genre} for genre in all_unique_genres]
        return html.Div([
            html.Label("Select Mood(s)/Genre(s):", className="fw-bold mt-3"),
            dcc.Dropdown(
                id="anime-mood-dropdown",
                options=mood_options_for_dropdown,
                multi=True,
                placeholder="Choose your current mood(s) or desired genre(s)..."
            ),
            dbc.Button(
                html.Span([html.I(className="fas fa-magic me-2"), "Get Mood Recommendations"]),
                id="recommend-mood-button", color="success", className="mt-3 w-100"
            )
        ])
    elif mode == "watched":
        anime_names = sorted(df["name"].dropna().unique())
        return html.Div([
            html.Label("Select Anime You Watched:", className="fw-bold mt-3"),
            dcc.Dropdown(
                id="anime-watched-dropdown",
                options=[{"label": name, "value": name} for name in anime_names],
                placeholder="Search your previously watched anime..."
            ),
            dbc.Button(
                html.Span([html.I(className="fas fa-film me-2"), "Get Similar Recommendations"]),
                id="recommend-watched-button", color="info", className="mt-3 w-100"
            )
        ])
    return html.Div()


def create_anime_card(anime_series, index):
    return dbc.Col(dbc.Card([
        dbc.CardBody([
            html.H5(anime_series['name'], className="card-title"),
            html.P(f"Rating: {anime_series.get('rating', 'N/A')}", className="card-text"),
            html.P(
                "Genres: " + ", ".join(anime_series.get('genres_list', [])),
                className="card-text small text-muted"
            )
        ])
    ], className="h-100 shadow-sm mb-3"), md=4, lg=3, className="mb-4") 
@app.callback(
    Output("anime-recommendation-results", "children"),
    Input("recommend-mood-button", "n_clicks"),
    State("anime-mood-dropdown", "value"),
    State("num-recommendations-input", "value"),
    prevent_initial_call=True
)
def recommend_by_mood(n_clicks, selected_moods, num_to_recommend):
    if not n_clicks: # No button click yet
        return ""
    if not selected_moods:
        return dbc.Alert("Please select at least one mood/genre.", color="warning", className="mt-3")

    df_copy = df.copy() # Work with a copy

    df_copy['Dot'] = df_copy[selected_moods].sum(axis=1)

    df_sorted = df_copy.sort_values(by=['Dot', 'rating'], ascending=[False, False])

    top_animes = df_sorted[df_sorted['Dot'] > 0].head(num_to_recommend)

    if top_animes.empty:
        return dbc.Alert("No anime found matching the selected mood(s)/genre(s). Try a different combination!", color="info", className="mt-3")

    cards = [create_anime_card(row, idx) for idx, row in top_animes.iterrows()]
    return dbc.Row(cards)

@app.callback(
    Output("anime-recommendation-results", "children", allow_duplicate=True), 
    Input("recommend-watched-button", "n_clicks"),
    State("anime-watched-dropdown", "value"),
    State("num-recommendations-input", "value"),
    prevent_initial_call=True
)
def recommend_by_watched(n_clicks, watched_anime_name, num_to_recommend):
    if not n_clicks: 
        return ""
    if not watched_anime_name:
        return dbc.Alert("Please select an anime you have watched.", color="warning", className="mt-3")

    watched_row_df = df[df["name"] == watched_anime_name]
    if watched_row_df.empty:
        return dbc.Alert(f"Anime '{watched_anime_name}' not found in the database.", color="danger", className="mt-3")

    watched_genres_set = set(watched_row_df.iloc[0]["genres_list"])

    if not watched_genres_set:
        return dbc.Alert(f"No genre information available for '{watched_anime_name}' to find similar anime.", color="info", className="mt-3")

    recommendations_df = df[df["name"] != watched_anime_name].copy()

    def count_common_genres(genre_list_to_compare):
        return len(watched_genres_set.intersection(set(genre_list_to_compare)))

    recommendations_df["common_genres_count"] = recommendations_df["genres_list"].apply(count_common_genres)

    # Filter out anime with no common genres and sort
    # Sort by common_genres_count (desc), then rating (desc)
    recommendations_df = recommendations_df[recommendations_df["common_genres_count"] > 0]
    recommendations_df = recommendations_df.sort_values(
        by=["common_genres_count", "rating"],
        ascending=[False, False]
    ).head(num_to_recommend)

    if recommendations_df.empty:
        return dbc.Alert(f"Could not find any similar anime recommendations for '{watched_anime_name}'.", color="info", className="mt-3")

    cards = [create_anime_card(row, idx) for idx, row in recommendations_df.iterrows()]
    return dbc.Row(cards)
##########################################################################################################plot
# --- Callbacks ---

from dash import ctx  # لو مش مضافة

@app.callback(
    Output('data-store', 'data'),
    Output('x-axis-dropdown', 'options'),
    Output('x-axis-dropdown', 'value'),
    Output('x-axis-dropdown1', 'options'),
    Output('y-axis-dropdown1', 'options'),
    Output('z-axis-dropdown1', 'options'),
    Output('color-dropdown', 'options'),
    Output('scatter-chart', 'figure'),
    Input('upload-data', 'contents'),
    State('upload-data', 'filename'),
    prevent_initial_call=True
)
def update_on_upload(contents, filename):
    if contents is None:
        return no_update, [], None, [], [], [], [], go.Figure()

    content_type, content_string = contents.split(',')
    decoded = base64.b64decode(content_string)
    try:
        if 'csv' in filename:
            global global_df
            df = pd.read_csv(io.StringIO(decoded.decode('utf-8')))
            global_df=df
        elif 'xls' in filename or 'xlsx' in filename: # دعم ملفات اكسل
            df = pd.read_excel(io.BytesIO(decoded))
            global_df=df
        else:
            print("Unsupported file type")
            return no_update, [], None, [], [], [], [], go.Figure().update_layout(title="Unsupported File Type")

        options = [{"label": col, "value": col} for col in df.columns]
        initial_fig = go.Figure()
        initial_x_col = None

        if not df.empty and len(df.columns) > 0:
            initial_x_col = df.columns[0]
            initial_fig = plot_distribution_of_clas_in_data(df, initial_x_col)
        
        # تخزين البيانات كـ JSON string
        stored_data = df.to_json(date_format='iso', orient='split')

        return stored_data, options, initial_x_col, options, options, options, options, initial_fig

    except Exception as e:
        print(f"Error processing file: {e}")
        return no_update, [], None, [], [], [], [], go.Figure().update_layout(title=f"Error Processing File: {e}")

# Callback لتحديث الرسم البياني التوزيعي
@app.callback(
    Output('scatter-chart', 'figure',allow_duplicate=True), # allow_duplicate لتجنب تعارض المخرجات
    Input('x-axis-dropdown', 'value'),
    Input('data-store', 'data'), # استخدام Input بدلا من State ليتم التحديث عند تغير البيانات أيضا
    prevent_initial_call=True
)
def update_distribution_plot(selected_col, stored_data):
    if not selected_col or not stored_data:
        return go.Figure().update_layout(title="Please select a column or upload data.")
    
    # تحويل البيانات من JSON string مرة أخرى إلى DataFrame
    df = pd.read_json(stored_data, orient='split')
    
    return plot_distribution_of_clas_in_data(df, selected_col)

# Callback لتحديث الرسم البياني ثلاثي الأبعاد
@app.callback(
    Output('3D-chart', 'figure'),
    Input('x-axis-dropdown1', 'value'),
    Input('y-axis-dropdown1', 'value'),
    Input('z-axis-dropdown1', 'value'),
    Input('color-dropdown', 'value'),
    Input('data-store', 'data'),
    prevent_initial_call=True
)
def update_3d_plot(x_col, y_col, z_col, color_col, stored_data):
    if not stored_data or not x_col or not y_col or not z_col:
        return go.Figure().update_layout(title="Please select X, Y, and Z axes for the 3D plot and upload data.")

    df = pd.read_json(stored_data, orient='split')

    fig = go.Figure()
    
    # التأكد من وجود الأعمدة في الـ DataFrame
    if not all(col in df.columns for col in [x_col, y_col, z_col]):
        return go.Figure().update_layout(title="One or more selected columns not found in data for 3D plot.")

    marker_config = {'size': 5}
    if color_col and color_col in df.columns:
        # التأكد أن عمود اللون رقمي إذا كان سيُستخدم لمقياس لوني مستمر
        if pd.api.types.is_numeric_dtype(df[color_col]):
            marker_config['color'] = df[color_col]
            marker_config['colorscale'] = 'Viridis' # مثال لمقياس لوني
            marker_config['showscale'] = True
        else: # إذا كان عمود اللون فئويًا، يمكن استخدامه لتلوين الفئات المختلفة بألوان مميزة
            # هذا يتطلب منطقًا أكثر تعقيدًا لتعيين الألوان، حاليًا سنستخدمه كما هو وسيعامله Plotly
            marker_config['color'] = df[color_col].astype('category').cat.codes # تحويل الفئات إلى أرقام
            # يمكنك إضافة colorscale مخصص هنا إذا أردت
            print(f"Warning: Categorical color column '{color_col}' used. Colors might not be distinct without custom mapping.")


    fig.add_trace(go.Scatter3d(
        x=df[x_col],
        y=df[y_col],
        z=df[z_col],
        mode='markers',
        marker=marker_config
    ))

    fig.update_layout(
        title=f"3D Scatter Plot: {x_col} vs {y_col} vs {z_col}",
        scene=dict(
            xaxis_title=x_col,
            yaxis_title=y_col,
            zaxis_title=z_col
        ),
        margin=dict(l=0, r=0, b=0, t=40) # تعديل الهوامش
    )
    return fig

##########################################################model tsest
# --- Callbacks ---

# Callback to parse uploaded data CSV
def parse_contents(contents, filename):
    content_type, content_string = contents.split(',')
    decoded = base64.b64decode(content_string)
    try:
        if 'csv' in filename:
            # Assume Csv file
            df = pd.read_csv(io.StringIO(decoded.decode('utf-8')))
        elif 'xls' in filename or 'xlsx' in filename:
            # Assume an Excel file
            df = pd.read_excel(io.BytesIO(decoded))
        else:
            return None, html.Div(['Invalid file type. Please upload CSV or Excel.'])
        return df, html.Div([f'{filename} uploaded successfully.'])
    except Exception as e:
        print(e)
        return None, html.Div(['There was an error processing this file.'])

@app.callback(
    [Output('data-store1', 'data'),
     Output('output-data-upload-status', 'children'),
     Output('column-dropdown', 'options'),
     Output('column-dropdown', 'value')],
    [Input('upload-data', 'contents')],
    [State('upload-data', 'filename')],
    allow_duplicate=True  
)
def update_data_output(contents, filename):
    if contents is not None:
        df, status_message = parse_contents(contents, filename)
        if df is not None:
            if 'actual' not in df.columns:
                return (None,
                        html.Div(['Error: CSV file must contain an "actual" column.']),
                        [], None)

            # Get categorical and numerical columns (excluding 'actual') for dropdown
            # You might want to refine this logic based on your specific needs
            potential_cols = [col for col in df.columns if col != 'actual'] # and df[col].dtype == 'object' or df[col].nunique() < 20]
            options = [{'label': col, 'value': col} for col in potential_cols]
            default_value = options[0]['value'] if options else None
            return df.to_json(date_format='iso', orient='split'), status_message, options, default_value
    return None, "Upload a data file (CSV).", [], None


# Callback to load the uploaded model
@app.callback(
    [Output('model-status-store', 'data'),
     Output('output-model-upload-status', 'children')],
    [Input('upload-model', 'contents')],
    [State('upload-model', 'filename')]
)
def update_model_output(contents, filename):
    global loaded_model
    if contents is not None:
        content_type, content_string = contents.split(',')
        decoded = base64.b64decode(content_string)
        try:
            # Assuming the model is saved with joblib or pickle
            # For joblib, it's better to use BytesIO
            model_file = io.BytesIO(decoded)
            loaded_model = joblib.load(model_file) # Or pickle.load(model_file)
            status_message = html.Div([f'{filename} model loaded successfully.'])
            return {'status': 'loaded', 'filename': filename}, status_message
        except Exception as e:
            print(e)
            loaded_model = None
            return {'status': 'error'}, html.Div(['There was an error loading the model. Ensure it is a .pkl or .joblib file.'])
    loaded_model = None # Reset if no content
    return {'status': 'empty'}, "Upload a model file (.pkl, .joblib)."


# Callback to process data with model and calculate metrics
@app.callback(
    [Output('processed-data-store', 'data'),
     Output('metrics-store', 'data'),
     Output('analysis-section', 'style')], # Show analysis section
    [Input('data-store1', 'data'),
     Input('model-status-store', 'data')]
)
def process_data_with_model(jsonified_data, model_status):
    global loaded_model
    ctx = dash.callback_context
    if not ctx.triggered or jsonified_data is None or model_status is None or model_status.get('status') != 'loaded' or loaded_model is None:
        return None, None, {'display': 'none'} # Keep section hidden

    df = pd.read_json(jsonified_data, orient='split')

    # Prepare features for prediction (assuming all columns except 'actual' are features)
    # This might need adjustment based on how your model was trained.
    if 'actual' not in df.columns:
        print("Error: 'actual' column not found in data for prediction.")
        return None, None, {'display': 'none'}

    feature_columns = [col for col in df.columns if col != 'actual']
    if not feature_columns:
        print("Error: No feature columns found in data for prediction.")
        return None, None, {'display': 'none'}

    X_test = df[feature_columns]
    y_actual = df['actual']

    try:
        predictions = loaded_model.predict(X_test)
        df['predicted'] = predictions
        df['is_correct'] = (df['actual'] == df['predicted'])

        # Calculate overall metrics
        accuracy = accuracy_score(y_actual, predictions)
        f1 = f1_score(y_actual, predictions, average='weighted') # use 'binary' or 'micro' if appropriate
        num_correct = int(df['is_correct'].sum())
        num_incorrect = len(df) - num_correct

        metrics = {
            'accuracy': accuracy,
            'f1_score': f1,
            'num_correct': num_correct,
            'num_incorrect': num_incorrect,
            'total_samples': len(df)
        }
        return df.to_json(date_format='iso', orient='split'), metrics, {'display': 'block'} # Show section
    except Exception as e:
        print(f"Error during prediction or metrics calculation: {e}")
        return None, None, {'display': 'none'}


# Callback for Plot 1: Bar Chart
@app.callback(
    Output('plot1-bar-chart', 'figure'),
    [Input('processed-data-store', 'data'),
     Input('column-dropdown', 'value')]
)
def update_plot1(jsonified_processed_data, selected_column):
    if jsonified_processed_data is None or selected_column is None:
        return go.Figure(layout={"title": "Plot 1: Upload data and model, then select a column."})

    df = pd.read_json(jsonified_processed_data, orient='split')
    if selected_column not in df.columns:
        return go.Figure(layout={"title": f"Plot 1: Column '{selected_column}' not found."})

    # Calculate counts of correct and incorrect predictions for each unique value in the selected column
    grouped_data = df.groupby([selected_column, 'is_correct']).size().reset_index(name='count')

    # Pivot the data for a stacked bar chart
    pivot_df = grouped_data.pivot(index=selected_column, columns='is_correct', values='count').fillna(0)
    # Ensure columns for True (Correct) and False (Incorrect) exist
    if True not in pivot_df.columns: pivot_df[True] = 0
    if False not in pivot_df.columns: pivot_df[False] = 0

    pivot_df = pivot_df.rename(columns={True: 'Correct', False: 'Incorrect'})

    fig = go.Figure()
    if 'Correct' in pivot_df.columns:
        fig.add_trace(go.Bar(
            x=pivot_df.index.astype(str), # Ensure x-axis is treated as categorical
            y=pivot_df['Correct'],
            name='Correct',
            marker_color='green'
        ))
    if 'Incorrect' in pivot_df.columns:
        fig.add_trace(go.Bar(
            x=pivot_df.index.astype(str),
            y=pivot_df['Incorrect'],
            name='Incorrect',
            marker_color='red'
        ))

    fig.update_layout(
        barmode='stack',
        title_text=f"Prediction Correctness for '{selected_column}'",
        xaxis_title=selected_column,
        yaxis_title="Number of Predictions",
        xaxis={'type': 'category'} # Explicitly set x-axis type to category
    )
    return fig

# Callback for Plot 2: Pie Chart
@app.callback(
    Output('plot2-pie-chart', 'figure'),
    [Input('processed-data-store', 'data'),
     Input('column-dropdown', 'value')]
)
def update_plot2(jsonified_processed_data, selected_column):
    if jsonified_processed_data is None or selected_column is None:
        return go.Figure(layout={"title": "Plot 2: Upload data and model, then select a column."})

    df = pd.read_json(jsonified_processed_data, orient='split')
    if selected_column not in df.columns:
        return go.Figure(layout={"title": f"Plot 2: Column '{selected_column}' not found."})


    errors_df = df[~df['is_correct']] # ~df['is_correct'] means df['is_correct'] == False

    if errors_df.empty:
        return go.Figure(data=[go.Pie(labels=['No Errors Found'], values=[1], hole=.3)],
                         layout=go.Layout(title_text=f"No Prediction Errors for '{selected_column}'"))

    error_counts = errors_df[selected_column].value_counts().reset_index()
    error_counts.columns = [selected_column, 'error_count']

    fig = px.pie(
        error_counts,
        names=selected_column,
        values='error_count',
        title=f"Error Distribution by Category in '{selected_column}'",
        hole=.3 # for a donut chart effect
    )
    fig.update_traces(textposition='inside', textinfo='percent+label')
    return fig

# Callback for Plot 3: Indicators
@app.callback(
    Output('plot3-indicators', 'figure'),
    [Input('metrics-store', 'data')]
)
def update_plot3(metrics):
    if metrics is None:
        return go.Figure(layout={"title": "Plot 3: Performance indicators will appear after processing data with model."})

    fig = go.Figure()

    fig.add_trace(go.Indicator(
        mode="gauge+number",
        value=metrics.get('accuracy', 0) * 100,
        title={'text': "Model Accuracy (%)"},
        domain={'x': [0, 0.45], 'y': [0.55, 1]},
        gauge={'axis': {'range': [None, 100]},
               'bar': {'color': "royalblue"},
               'steps': [
                   {'range': [0, 60], 'color': "lightgray"},
                   {'range': [60, 85], 'color': "gray"}],
               'threshold': {'line': {'color': "red", 'width': 4}, 'thickness': 0.75, 'value': 90}}
    ))

    fig.add_trace(go.Indicator(
        mode="number",
        value=metrics.get('num_correct', 0),
        title={"text": "Number of Correct Predictions"},
        domain={'x': [0.55, 1], 'y': [0.75, 1]} # Adjusted y for better spacing
    ))

    fig.add_trace(go.Indicator(
        mode="number",
        value=metrics.get('num_incorrect', 0),
        title={"text": "Number of Incorrect Predictions"},
        domain={'x': [0.55, 1], 'y': [0.5, 0.70]} # Adjusted y for better spacing
    ))

    fig.add_trace(go.Indicator(
        mode="gauge+number",
        value=metrics.get('f1_score', 0),
        title={'text': "F1 Score (Weighted)"},
        domain={'x': [0, 0.45], 'y': [0, 0.45]},
        gauge={'axis': {'range': [None, 1]},
               'bar': {'color': "green"}}
    ))

    fig.update_layout(
        grid={'rows': 2, 'columns': 2, 'pattern': "independent"},
        height=500 
    )

    return fig
@app.callback(
    Output("download-data", "data"),
    Input("download-new-data-button", "n_clicks"),
    prevent_initial_call=True
)
def download_new_data(n_clicks):
    return dcc.send_data_frame(global_df.to_csv, "modified_data.csv", index=False)

app.run(debug=True, jupyter_mode="external", port=8052)


Dash app running on http://127.0.0.1:8052/



Passing literal json to 'read_json' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Passing literal json to 'read_json' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Passing literal json to 'read_json' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Passing literal json to 'read_json' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Passing literal json to 'read_json' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Passing literal json to 'read_json' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Passing literal json to 'read_json' is deprecated and will be r

In [11]:
global_df