In [162]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
import joblib

In [164]:
# Load the dataset
file_path = "data.csv"  # Update the path to your dataset
data = pd.read_csv(file_path)

In [166]:
# Data preprocessing
# Selecting relevant columns
data = data[['backers_count', 'pledged_amt', 'goal', 'state']]

In [168]:
# Encoding target variable ('state') as binary: 'successful' = 1, others = 0
data['state'] = data['state'].apply(lambda x: 1 if x == 'successful' else 0)


In [170]:
# Splitting features and target variable
X = data.drop(columns='state')
y = data['state']


In [172]:
# Standardizing numerical features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


In [174]:
# Splitting into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42, stratify=y)


In [176]:
# Feature selection: Select top 2 features
selector = SelectKBest(score_func=f_classif, k=2)
X_train_reduced = selector.fit_transform(X_train, y_train)
X_test_reduced = selector.transform(X_test)

In [178]:
# Train the Logistic Regression model
logistic_model = LogisticRegression(random_state=42, max_iter=50, C=0.03)
logistic_model.fit(X_train_reduced, y_train)

In [197]:
# Predictions and evaluation
y_pred = logistic_model.predict(X_test_reduced)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy  * 100:.2f}%")
# Calculate and display precision, recall, and F1-score
print("\nClassification Report:")
print(classification_report(y_test, y_pred))



Accuracy: 84.57%

Classification Report:
              precision    recall  f1-score   support

           0       0.76      0.91      0.83     14628
           1       0.93      0.80      0.86     21647

    accuracy                           0.85     36275
   macro avg       0.84      0.86      0.84     36275
weighted avg       0.86      0.85      0.85     36275



In [202]:
# Save the trained model
model_filename = "logistic_model_b_file.pkl"
joblib.dump(logistic_model, model_filename)
print(f"Model saved to {model_filename}")

Model saved to logistic_model_b_file.pkl
