In [1]:
import pandas as pd
# Load your data
data = pd.read_csv('preprocess1.csv')
data.head(3)

Unnamed: 0,Age,Gender,Race,ChronicPainConditions,NumOpioidPrescriptions,AverageDosage,DurationOfPrescriptions,NumHealthcareVisits,NumHospitalizations,PainManagementTreatment,MedicationName,Frequency,Duration,Refills,MedicationClass,Adherence,AppointmentType,Target,Dosage_numeric
0,54,0,3,3,6,80,7,18,0,0,3,0.0,1,0,2,1.0,2,0,10.0
1,21,0,0,1,11,70,25,12,0,1,4,0.0,20,0,1,0.0,0,1,100.0
2,67,0,4,3,5,15,7,10,2,0,3,1.0,7,2,0,1.0,2,0,20.0


In [2]:
data.shape

(1000, 19)

In [3]:
data.isnull().sum()

Age                        0
Gender                     0
Race                       0
ChronicPainConditions      0
NumOpioidPrescriptions     0
AverageDosage              0
DurationOfPrescriptions    0
NumHealthcareVisits        0
NumHospitalizations        0
PainManagementTreatment    0
MedicationName             0
Frequency                  0
Duration                   0
Refills                    0
MedicationClass            0
Adherence                  0
AppointmentType            0
Target                     0
Dosage_numeric             0
dtype: int64

In [4]:
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

In [6]:
features = [
    'Age', 'Gender', 'Race', 'ChronicPainConditions', 'NumOpioidPrescriptions', 
    'AverageDosage', 'DurationOfPrescriptions', 'NumHealthcareVisits', 
    'NumHospitalizations', 'PainManagementTreatment', 'MedicationName', 
    'Frequency', 'Duration', 'Refills', 'MedicationClass', 'Adherence', 
    'AppointmentType', 'Dosage_numeric'
]
target = 'Target'

# Split data into features (X) and target (y)
X = data[features]
y = data[target]

In [7]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Feature scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [12]:
import warnings 
# Settings the warnings to be ignored 
warnings.filterwarnings('ignore') 

# Define hyperparameter grid for GridSearchCV
param_grid = {
    'solver': ['lbfgs', 'liblinear', 'sag'],  # Experiment with different solvers
    'C': [0.001, 0.01, 0.1, 1, 10, 100],  # Regularization parameter (inverse of strength)
    'penalty': ['l1', 'l2']  # Regularization type
}

# Create a logistic regression classifier
logistic_regression = LogisticRegression()

# Perform hyperparameter tuning using GridSearchCV
grid_search = GridSearchCV(logistic_regression, param_grid, cv=5, scoring='accuracy')  # Use cross-validation
grid_search.fit(X_train, y_train)

In [11]:
# Print the best hyperparameters and accuracy on the training set
print("Best Hyperparameters:", grid_search.best_params_)
print("Training Accuracy:", grid_search.best_score_)

# Predict on the test set and calculate accuracy
y_pred = grid_search.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Test Accuracy:", accuracy)

Best Hyperparameters: {'C': 10, 'penalty': 'l1', 'solver': 'liblinear'}
Training Accuracy: 1.0
Test Accuracy: 1.0


In [13]:
from sklearn.svm import SVC
# Define hyperparameter grid for GridSearchCV (experiment with different values)
param_grid = {
    'C': [0.001, 0.01, 0.1, 1, 10, 100],  # Regularization parameter (inverse of strength)
    'kernel': ['linear', 'rbf', 'poly'],  # Kernel function
    'gamma': [0.001, 0.01, 0.1, 1],  # Kernel coefficient (for non-linear kernels)
}

# Create a Support Vector Machine classifier
svm_classifier = SVC()

# Perform hyperparameter tuning using GridSearchCV
grid_search = GridSearchCV(svm_classifier, param_grid, cv=5, scoring='accuracy')  # Use cross-validation
grid_search.fit(X_train, y_train)

# Print the best hyperparameters and accuracy on the training set
print("Best Hyperparameters:", grid_search.best_params_)
print("Training Accuracy:", grid_search.best_score_)

# Predict on the test set and calculate accuracy
y_pred = grid_search.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Test Accuracy:", accuracy)

Best Hyperparameters: {'C': 0.1, 'gamma': 0.001, 'kernel': 'linear'}
Training Accuracy: 1.0
Test Accuracy: 1.0


In [14]:
from sklearn.neighbors import KNeighborsClassifier
# Define hyperparameter grid for GridSearchCV (experiment with different values)
param_grid = {
    'n_neighbors': range(1, 21),  # Number of neighbors to consider
    'weights': ['uniform', 'distance'],  # Weighting scheme for distance calculation
    'algorithm': ['auto', 'ball_tree', 'kd_tree']  # Algorithm for neighbor search
}

# Create a K-Nearest Neighbors classifier
knn_classifier = KNeighborsClassifier()

# Perform hyperparameter tuning using GridSearchCV
grid_search = GridSearchCV(knn_classifier, param_grid, cv=5, scoring='accuracy')  # Use cross-validation
grid_search.fit(X_train, y_train)

# Print the best hyperparameters and accuracy on the training set
print("Best Hyperparameters:", grid_search.best_params_)
print("Training Accuracy:", grid_search.best_score_)

# Predict on the test set and calculate accuracy
y_pred = grid_search.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Test Accuracy:", accuracy)

Best Hyperparameters: {'algorithm': 'ball_tree', 'n_neighbors': 10, 'weights': 'distance'}
Training Accuracy: 0.7837500000000001
Test Accuracy: 0.75


In [15]:
from sklearn.tree import DecisionTreeClassifier
# Define hyperparameter grid
param_grid = {
    'max_depth': range(2, 11),  # Maximum depth of the tree
    'min_samples_split': range(2, 21),  # Minimum samples to split a node
    'min_samples_leaf': range(1, 21)  # Minimum samples required at each leaf node
}

# Create and tune the model
dt_clf = DecisionTreeClassifier()
grid_search = GridSearchCV(dt_clf, param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)

# Print results
print("Best Hyperparameters (Decision Tree):", grid_search.best_params_)
print("Training Accuracy (Decision Tree):", grid_search.best_score_)

# Predict and evaluate
y_pred = grid_search.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Test Accuracy (Decision Tree):", accuracy)

Best Hyperparameters (Decision Tree): {'max_depth': 2, 'min_samples_leaf': 1, 'min_samples_split': 2}
Training Accuracy (Decision Tree): 1.0
Test Accuracy (Decision Tree): 1.0


In [16]:
from sklearn.ensemble import RandomForestClassifier
# Define hyperparameter grid
param_grid = {
    'n_estimators': range(10, 101, 10),  # Number of trees in the forest
    'max_depth': range(2, 11),  # Maximum depth of individual trees
    'min_samples_split': range(2, 21),  # Minimum samples to split a node
    'min_samples_leaf': range(1, 21)  # Minimum samples required at each leaf node
}

# Create and tune the model
rf_clf = RandomForestClassifier()
grid_search = GridSearchCV(rf_clf, param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)

# Print results
print("Best Hyperparameters (Random Forest):", grid_search.best_params_)
print("Training Accuracy (Random Forest):", grid_search.best_score_)

# Predict and evaluate
y_pred = grid_search.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Test Accuracy (Random Forest):", accuracy)

KeyboardInterrupt: 

In [17]:
from sklearn.ensemble import GradientBoostingClassifier
# Define hyperparameter grid
param_grid = {
    'learning_rate': [0.01, 0.1, 1],  # Learning rate for boosting
    'n_estimators': range(10, 101, 10),  # Number of boosting stages
    'max_depth': range(2, 6),  # Maximum depth of individual trees
    'min_samples_split': range(2, 21),  # Minimum samples to split a node
    'min_samples_leaf': range(1, 21)  # Minimum samples required at each leaf node
}

# Create and tune the model
gbm_clf = GradientBoostingClassifier()
grid_search = GridSearchCV(gbm_clf, param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)

# Print results
print("Best Hyperparameters (GBM):", grid_search.best_params_)
print("Training Accuracy (GBM):", grid_search.best_score_)

# Predict and evaluate
y_pred = grid_search.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Test Accuracy (GBM):", accuracy)

KeyboardInterrupt: 

In [None]:
Other Algorithm we have XGBoost
- LightGBM
- CatBoost
- Naive Bayes
- Neural Networks (Multilayer Perceptron, MLP)
- AdaBoost
- Bagging Classifier