In [2]:
import os
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score
path_root = os.path.dirname(os.getcwd())
path_data1 = os.path.join(path_root, "18752project")
path_data = os.path.join(path_data1, "data")
# Paths to the datasets
data_path = os.path.join(path_data, "weather_dataset.csv")
label_data_path = os.path.join(path_data, "weather_prediction_sleep_labels.csv")

# Load the datasets
features_df = pd.read_csv(data_path)
labels_df = pd.read_csv(label_data_path)

# Merge the features and labels DataFrames on the 'DATE' column or another appropriate key
full_dataset = pd.merge(features_df, labels_df, on='DATE')

# Assuming 'sleep_label' is the name of your target column in labels_df
X = full_dataset.drop(columns=['sleep_weather', 'DATE'])  # Drop the label and any non-feature columns
y = full_dataset['sleep_weather']  # Target labels

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)



In [3]:
# Initialize and train the Support Vector Machine classifier
svm_classifier = SVC(kernel='linear')  # Using a linear kernel for simplicity
svm_classifier.fit(X_train, y_train)

# Predict the labels on the test dataset
y_pred = svm_classifier.predict(X_test)

# Evaluate the classifier's performance
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print("Accuracy of SVM classifier:", accuracy)
print("Classification report:\n", report)


Accuracy of SVM classifier: 0.8768809849521204
Classification report:
               precision    recall  f1-score   support

       False       0.90      0.87      0.89       408
        True       0.85      0.88      0.86       323

    accuracy                           0.88       731
   macro avg       0.87      0.88      0.88       731
weighted avg       0.88      0.88      0.88       731



In [12]:
# KNN classifier

from sklearn.neighbors import KNeighborsClassifier

# Initialize the K-Nearest Neighbors classifier with 5 neighbors (default)
knn_classifier = KNeighborsClassifier(n_neighbors=9)

# Train the classifier
knn_classifier.fit(X_train, y_train)

# Predict the labels on the test dataset
y_pred_knn = knn_classifier.predict(X_test)

# Evaluate the classifier's performance
accuracy_knn = accuracy_score(y_test, y_pred_knn)
report_knn = classification_report(y_test, y_pred_knn)

print("Accuracy of KNN classifier:", accuracy_knn)
print("Classification report for KNN:\n", report_knn)


Accuracy of KNN classifier: 0.8891928864569083
Classification report for KNN:
               precision    recall  f1-score   support

       False       0.93      0.87      0.90       408
        True       0.85      0.92      0.88       323

    accuracy                           0.89       731
   macro avg       0.89      0.89      0.89       731
weighted avg       0.89      0.89      0.89       731



In [13]:
from sklearn.linear_model import LogisticRegression

# Initialize the Logistic Regression classifier
logistic_regression_classifier = LogisticRegression(max_iter=1000)  # Increase max_iter if needed for convergence

# Train the classifier
logistic_regression_classifier.fit(X_train, y_train)

# Predict the labels on the test dataset
y_pred_lr = logistic_regression_classifier.predict(X_test)

# Evaluate the classifier's performance
accuracy_lr = accuracy_score(y_test, y_pred_lr)
report_lr = classification_report(y_test, y_pred_lr)

print("Accuracy of Logistic Regression classifier:", accuracy_lr)
print("Classification report for Logistic Regression:\n", report_lr)


Accuracy of Logistic Regression classifier: 0.8755129958960328
Classification report for Logistic Regression:
               precision    recall  f1-score   support

       False       0.90      0.87      0.89       408
        True       0.84      0.88      0.86       323

    accuracy                           0.88       731
   macro avg       0.87      0.88      0.87       731
weighted avg       0.88      0.88      0.88       731

