
# **Space X  Falcon 9 First Stage Landing Prediction**
## Assignment: Machine Learning Prediction

In this notebook, you will create a machine learning pipeline to predict if the Falcon 9 first stage will land successfully.
The pipeline will use data from the preceding labs.


In [None]:

# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

# Load the dataset
url = "https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-DS0321EN-SpaceX/labs/module_3/spacex_launch_data.csv"
df = pd.read_csv(url)

# Data preprocessing
X = df[['FlightNumber', 'PayloadMass', 'Orbit', 'LaunchSite', 'Outcome', 'Flights', 'GridFins', 'Reused', 'Legs', 'LandingOutcome']]
Y = df['Class']  # Class is the target variable

# Split the data into training and testing sets
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

# Display the first few rows of the dataset
df.head()


In [None]:

# TASK 10: Implement KNN and tune hyperparameters using GridSearchCV
knn = KNeighborsClassifier()

# Define the parameters grid
param_grid = {'n_neighbors': np.arange(1, 10)}

# Implement GridSearchCV for KNN
knn_cv = GridSearchCV(knn, param_grid, cv=5)
knn_cv.fit(X_train, Y_train)

# Print the tuned hyperparameters and accuracy
print("Tuned hyperparameters (best parameters): ", knn_cv.best_params_)
print("Best cross-validated accuracy: ", knn_cv.best_score_)


In [None]:

# TASK 11: Evaluate KNN on test data
test_accuracy = knn_cv.score(X_test, Y_test)
print("Test set accuracy: ", test_accuracy)

# Plot the confusion matrix
yhat = knn_cv.predict(X_test)
cm = confusion_matrix(Y_test, yhat)

# Visualize the confusion matrix
plt.figure(figsize=(6, 4))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.title("Confusion Matrix for KNN Model")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.show()


In [None]:

# TASK 12: Compare the performance of different classifiers

# Logistic Regression
lr = LogisticRegression()
lr.fit(X_train, Y_train)
lr_accuracy = lr.score(X_test, Y_test)

# Support Vector Machine
svc = SVC()
svc.fit(X_train, Y_train)
svc_accuracy = svc.score(X_test, Y_test)

# Decision Tree
dt = DecisionTreeClassifier()
dt.fit(X_train, Y_train)
dt_accuracy = dt.score(X_test, Y_test)

# Display the accuracy for each model
print(f"Logistic Regression accuracy: {lr_accuracy}")
print(f"SVM accuracy: {svc_accuracy}")
print(f"Decision Tree accuracy: {dt_accuracy}")

# Find the best performing method
best_method = max([(lr_accuracy, "Logistic Regression"), (svc_accuracy, "SVM"), (dt_accuracy, "Decision Tree")], key=lambda x: x[0])
print(f"Best performing method: {best_method[1]} with accuracy {best_method[0]}")
