# Install Dependencies

pip install pandas scikit-learn


# Code for Breast Cancer 

In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_breast_cancer
import pickle

# 1. Load and prepare the dataset
def load_data():
    # Load Breast Cancer dataset from sklearn
    data = load_breast_cancer()
    df = pd.DataFrame(data.data, columns=data.feature_names)
    df['target'] = data.target
    return df

# 2. Preprocess the data
def preprocess_data(df):
    # Split data into features and target
    X = df.drop('target', axis=1)
    y = df['target']
    
    # Split into training and test sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    # Standardize the data
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)
    
    return X_train, X_test, y_train, y_test, scaler

# 3. Train a machine learning model
def train_model(X_train, y_train):
    model = LogisticRegression(max_iter=10000)
    model.fit(X_train, y_train)
    return model

# 4. Evaluate the model
def evaluate_model(model, X_test, y_test):
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    print(f'Model accuracy: {accuracy * 100:.2f}%')

# 5. Save the model and scaler
def save_model(model, scaler):
    with open('breast_cancer_model.pkl', 'wb') as model_file:
        pickle.dump(model, model_file)
    with open('scaler.pkl', 'wb') as scaler_file:
        pickle.dump(scaler, scaler_file)

# 6. Load the saved model and scaler for prediction
def load_saved_model():
    with open('breast_cancer_model.pkl', 'rb') as model_file:
        model = pickle.load(model_file)
    with open('scaler.pkl', 'rb') as scaler_file:
        scaler = pickle.load(scaler_file)
    return model, scaler

# 7. Make predictions
def predict(model, scaler, features):
    features = scaler.transform([features])  # standardize the input
    prediction = model.predict(features)
    return 'Malignant' if prediction == 0 else 'Benign'

# Main function to run the model training
if __name__ == "__main__":
    # Load data
    df = load_data()
    
    # Preprocess data
    X_train, X_test, y_train, y_test, scaler = preprocess_data(df)
    
    # Train the model
    model = train_model(X_train, y_train)
    
    # Evaluate the model
    evaluate_model(model, X_test, y_test)
    
    # Save the model for later use
    save_model(model, scaler)
    
    print("Model training complete and saved.")


Model accuracy: 97.37%
Model training complete and saved.


# Code for Data Preprocessing (using Breast Cancer Wisconsin Dataset)

In [1]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_breast_cancer

# Load the dataset
data = load_breast_cancer()
df = pd.DataFrame(data.data, columns=data.feature_names)
df['target'] = data.target  # Add target column (Malignant=0, Benign=1)

# Preprocessing: Splitting the data
X = df.drop('target', axis=1)  # Features
y = df['target']  # Target labels

# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the data (important for many machine learning algorithms)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

print(X_train[:5])  # Show first 5 rows of standardized features


[[-1.44075296 -0.43531947 -1.36208497 -1.1391179   0.78057331  0.71892128
   2.82313451 -0.11914956  1.09266219  2.45817261 -0.26380039 -0.01605246
  -0.47041357 -0.47476088  0.83836493  3.25102691  8.43893667  3.39198733
   2.62116574  2.06120787 -1.23286131 -0.47630949 -1.24792009 -0.97396758
   0.72289445  1.18673232  4.67282796  0.9320124   2.09724217  1.88645014]
 [ 1.97409619  1.73302577  2.09167167  1.85197292  1.319843    3.42627493
   2.01311199  2.66503199  2.1270036   1.55839569  0.80531919 -0.81268678
   0.75195659  0.87716951 -0.89605315  1.18122247  0.18362761  0.60059598
  -0.31771686  0.52963649  2.17331385  1.3112795   2.08161691  2.1374055
   0.76192793  3.26560084  1.92862053  2.6989469   1.89116053  2.49783848]
 [-1.39998202 -1.24962228 -1.34520926 -1.10978518 -1.33264483 -0.30735463
  -0.36555756 -0.69650228  1.93033305  0.95437877  0.02752055  1.96305996
  -0.12095781 -0.35077918  0.57276579  0.7394992   0.32065553  0.58946222
   2.61504052  0.71892779 -1.29528358

In [5]:
import pandas as pd

# Define the dataset based on the image
data = {
    "Class": [0, 1, "accuracy", "macro avg", "weighted avg"],
    "precision": [0.97, 0.97, "", 0.97, 0.97],
    "recall": [0.97, 0.97, "", 0.97, 0.97],
    "f1-score": [0.97, 0.97, 0.97, 0.97, 0.97],
    "support": [42, 68, 110, 110, 110]
}

# Create DataFrame
df = pd.DataFrame(data)

# Display the DataFrame
print(df)


          Class precision recall  f1-score  support
0             0      0.97   0.97      0.97       42
1             1      0.97   0.97      0.97       68
2      accuracy                       0.97      110
3     macro avg      0.97   0.97      0.97      110
4  weighted avg      0.97   0.97      0.97      110
