In [1]:
%%writefile app.py
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor, GradientBoostingClassifier, GradientBoostingRegressor
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
from sklearn.svm import SVC, SVR
from sklearn.metrics import accuracy_score, r2_score
import joblib
import streamlit as st

def detect_task_type(y):
    return 'classification' if y.dtype == 'object' or y.nunique() < 20 else 'regression'

def preprocess_and_train(df, target_column):
    X = df.drop(target_column, axis=1)
    y = df[target_column]

    task_type = detect_task_type(y)

    num_features = X.select_dtypes(include=['int64', 'float64']).columns.tolist()
    cat_features = X.select_dtypes(include=['object']).columns.tolist()

    num_pipeline = Pipeline([
        ('imputer', SimpleImputer(strategy='median')),
        ('scaler', StandardScaler())
    ])

    cat_pipeline = Pipeline([
        ('imputer', SimpleImputer(strategy='most_frequent')),
        ('encoder', OneHotEncoder(handle_unknown='ignore'))
    ])

    preprocessor = ColumnTransformer([
        ('num', num_pipeline, num_features),
        ('cat', cat_pipeline, cat_features)
    ])

    if task_type == 'classification':
        models = {
            'LogisticRegression': LogisticRegression(max_iter=1000),
            'DecisionTreeClassifier': DecisionTreeClassifier(),
            'RandomForestClassifier': RandomForestClassifier(),
            'GradientBoostingClassifier': GradientBoostingClassifier(),
            'KNeighborsClassifier': KNeighborsClassifier(),
            'SVC': SVC()
        }
    else:
        models = {
            'LinearRegression': LinearRegression(),
            'DecisionTreeRegressor': DecisionTreeRegressor(),
            'RandomForestRegressor': RandomForestRegressor(),
            'GradientBoostingRegressor': GradientBoostingRegressor(),
            'KNeighborsRegressor': KNeighborsRegressor(),
            'SVR': SVR()
        }

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    results = {}
    best_score = -np.inf
    best_model_name = ''
    best_model = None

    for name, model in models.items():
        pipe = Pipeline([
            ('preprocessor', preprocessor),
            ('model', model)
        ])

        pipe.fit(X_train, y_train)
        y_pred = pipe.predict(X_test)

        score = accuracy_score(y_test, y_pred) if task_type == 'classification' else r2_score(y_test, y_pred)
        results[name] = round(score * 100, 2)  # Convert to percentage

        if score > best_score:
            best_score = score
            best_model_name = name
            best_model = pipe

    joblib.dump(best_model, 'best_model.pkl')
    return best_model_name, results, best_model.predict(X), task_type


# Streamlit UI
st.title("📊 ML Genie")

st.sidebar.markdown("## 🚀 Quick Start")
st.sidebar.markdown("""
1. Upload your CSV dataset
2. Select target column
3. Run analysis
4. Download predictions
""")

st.sidebar.markdown("## 📋 Supported Formats")
st.sidebar.markdown("""
- CSV files only
- Numerical and categorical features
- Classification and regression tasks
""")

uploaded_file = st.file_uploader("📂 Upload your dataset (.csv)", type=["csv"])

if uploaded_file is not None:
    if not uploaded_file.name.endswith('.csv'):
        st.error("❌ The format is not supported. Please upload a CSV file.")
        st.stop()

    try:
        df = pd.read_csv(uploaded_file)
        st.write("### 🔍 Dataset Preview", df.head())

        target_column = st.selectbox("🎯 Select the target column:", df.columns)

        if st.button("🚀 Run Analysis"):
            with st.spinner("Training models, please wait..."):
                best_model_name, results, predictions, task_type = preprocess_and_train(df, target_column)

            st.success(f"✅ Best Model: {best_model_name} ({task_type.title()})")
            st.write("### 📈 Model Performance (%):")
            st.dataframe(pd.DataFrame.from_dict(results, orient='index', columns=['Score']).sort_values(by='Score', ascending=False))

            df['Predictions'] = predictions
            st.download_button("📥 Download Predictions CSV", data=df.to_csv(index=False), file_name="predictions.csv")

    except Exception as e:
        st.error(f"🚫 Error reading file: {e}")
else:
    st.info("📂 Please upload a CSV file to continue.")


Writing app.py


In [2]:
!pip install streamlit
!pip install pyngrok

Collecting streamlit
  Downloading streamlit-1.47.0-py3-none-any.whl.metadata (9.0 kB)
Collecting watchdog<7,>=2.1.5 (from streamlit)
  Downloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.47.0-py3-none-any.whl (9.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.9/9.9 MB[0m [31m40.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m68.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl (79 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.1/79.1 kB[0m [31m4.7 MB/s[0m eta [36m0:00:00[0m
[?25hInst

In [3]:
!ngrok authtoken 30BSvhKS0UakM7ZXSv2i1dIo0wf_2F2vBDU86aSMsAJCachbM

Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml


In [4]:
import os
import threading

def run_streamlit():
  os.system('streamlit run app.py --server.port 8501')

thread=threading.Thread(target=run_streamlit)
thread.start()

In [5]:
from pyngrok import ngrok

# Close all open tunnels first
ngrok.kill()  # This kills all active tunnels

In [6]:
from pyngrok import ngrok
import time

time.sleep(5)
public_url=ngrok.connect(8501)
print('your streamlit app is live here: ',public_url)

your streamlit app is live here:  NgrokTunnel: "https://682da3bbb501.ngrok-free.app" -> "http://localhost:8501"
