In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import LabelEncoder


In [None]:
# Upload the CSV file
from google.colab import files
uploaded = files.upload()

# After uploading, load the dataset
import pandas as pd
import io

# Use the uploaded file
data = pd.read_csv(io.BytesIO(uploaded['buffer_data.csv']))


Saving buffer_data.csv to buffer_data.csv


In [None]:
# Load the dataset
data = pd.read_csv('buffer_data.csv')

# Check the first few rows to understand the structure
print(data.head())


                        Name  Gender Neurodiversity        Date  \
0  Miss Brianna Benjamin DDS  Female       Dyslexia  30-08-2024   
1              Brandy Taylor    Male           ADHD  08-06-2024   
2                  Carl Carr  Female       Dyslexia  27-02-2024   
3               Haley Powers  Female           ADHD  18-03-2024   
4               Joshua Davis  Female           ADHD  27-08-2024   

                         Email  Time Spent on Media  Productive Time Spent  \
0  andersonmichael@example.org                  206                    203   
1     karenwoodard@example.net                  436                    121   
2  hollandmichelle@example.net                  598                    480   
3         robert62@example.com                  295                    186   
4      maryrussell@example.com                  346                    444   

   Score  
0     10  
1     60  
2     40  
3     70  
4     40  


In [None]:
def adjust_scores(row):
    media_penalty = (row['Time Spent on Media'] // 10) * 25
    productive_bonus = (row['Productive Time Spent'] // 10) * 100
    return row['Score'] - media_penalty + productive_bonus

# Apply the score adjustment
data['Adjusted Score'] = data.apply(adjust_scores, axis=1)


In [None]:
# Encode categorical variables
labelencoder = LabelEncoder()
data['Gender'] = labelencoder.fit_transform(data['Gender'])
data['Neurodiversity'] = labelencoder.fit_transform(data['Neurodiversity'])


In [None]:
# You need to convert the 'Date' column to datetime
data['Date'] = pd.to_datetime(data['Date'], format='%d-%m-%Y')

# Sort by Date and calculate weekly changes
data['Week'] = data['Date'].dt.isocalendar().week
data = data.sort_values(by=['Name', 'Date'])

# Calculate score changes for each student per week
data['Score Change'] = data.groupby('Name')['Adjusted Score'].diff()

# Create the binary outcome: 1 for improvement, 0 for deterioration
data['Condition'] = np.where(data['Score Change'] > 0, 1, 0)


In [None]:
# Features (X) and Labels (y)
X = data[['Gender', 'Neurodiversity', 'Time Spent on Media', 'Productive Time Spent', 'Adjusted Score']]
y = data['Condition']

# Split into training and testing data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0)


In [None]:
# Train the KNN model
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)

# Predict the condition
y_pred = knn.predict(X_test)

# Evaluate the model
from sklearn.metrics import accuracy_score, confusion_matrix
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))


Accuracy: 1.0
Confusion Matrix:
 [[25]]


In [None]:
# Import necessary libraries for evaluation
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Predict the condition on the test set
y_pred = knn.predict(X_test)

# Display the predicted and actual outcomes
print("Predicted Conditions (0: Deteriorating, 1: Improving):")
print(y_pred)

print("\nActual Conditions (0: Deteriorating, 1: Improving):")
print(y_test.values)

# Print accuracy score
accuracy = accuracy_score(y_test, y_pred)
print(f"\nAccuracy of the KNN model: {accuracy * 100:.2f}%")

# Print confusion matrix and classification report for more detailed performance metrics
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))

print("\nClassification Report:")
print(classification_report(y_test, y_pred))


Predicted Conditions (0: Deteriorating, 1: Improving):
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]

Actual Conditions (0: Deteriorating, 1: Improving):
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]

Accuracy of the KNN model: 100.00%

Confusion Matrix:
[[25]]

Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        25

    accuracy                           1.00        25
   macro avg       1.00      1.00      1.00        25
weighted avg       1.00      1.00      1.00        25

