# Titanic Dataset Analysis

In [16]:
#Import necessary libraries for data manipulation and machine learning
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline

#Load the Titanic dataset from a CSV file
titanic_data = pd.read_csv (r'C:\Users\Vishal V P\Downloads\tested.csv')

#Handle missing values in the 'Age'column by filling them with median age
titanic_data ['Age'] = titanic_data ['Age'].fillna (titanic_data ['Age'].median ())

#Handling missing values in the 'Embarked' column by filling them with the most frequent value (mode)
titanic_data ['Embarked'] = titanic_data ['Embarked'].fillna (titanic_data ['Embarked'].mode () [0])

#Convert categorical variables into numerical variables by using one-hot encoding
titanic_data = pd.get_dummies (titanic_data, columns = ['Sex', 'Embarked'])

# Drop the 'Name' column as it is not necessary for modeling
titanic_data = titanic_data.drop ('Name', axis = 1)

# Drop any other columns that contain string values to ensure all data is numerical
titanic_data = titanic_data.select_dtypes (include = ['int64', 'float64'])

#Split the data into features (X) and the target variables (Y)
X = titanic_data.drop ('Survived', axis = 1)
y = titanic_data ['Survived']

#Split the data into training and testing sets for model evaluation 
X_train, X_test, y_train, y_test = train_test_split (X, y, test_size = 0.2, random_state = 42)

# Create a pipeline with an imputer and a logistic regression model
pipeline = Pipeline([
    ('imputer', SimpleImputer (strategy = 'mean')),
    ('model', LogisticRegression ())
])

# Fit the pipeline to the training data
pipeline.fit (X_train, y_train)

#Use the pipeline to make predictions on the testing data
y_pred = pipeline.predict (X_test)

# Evaluate the accuracy of the model
print ('Accuracy:', accuracy_score (y_test, y_pred))


Accuracy: 0.6190476190476191


Accuracy: 0.6190476190476191