# Personal Productivity Classifier
This notebook builds a logistic regression classifier to classify daily activities as productive, neutral, or distracting based on TF-IDF and engineered time-based features.

In [None]:
# Imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns

In [None]:
# Sample data creation (to be replaced with actual data)
data = pd.DataFrame({
    'activity': ['Email client', 'Browse social media', 'Code feature', 'Watch YouTube', 'Team meeting'],
    'timestamp': pd.date_range(start='2023-01-01', periods=5, freq='H'),
    'duration_minutes': [30, 45, 120, 60, 90],
    'label': ['productive', 'distracting', 'productive', 'distracting', 'neutral']
})

# Feature engineering
data['hour'] = data['timestamp'].dt.hour
data['dayofweek'] = data['timestamp'].dt.dayofweek

# TF-IDF on activity text
vectorizer = TfidfVectorizer()
X_text = vectorizer.fit_transform(data['activity'])

# Combine features
X_time = data[['duration_minutes', 'hour', 'dayofweek']].values
from scipy.sparse import hstack
X = hstack([X_text, X_time])

# Encode labels
label_map = {'productive': 0, 'neutral': 1, 'distracting': 2}
y = data['label'].map(label_map)

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Logistic Regression model
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

# Evaluation
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))
sns.heatmap(confusion_matrix(y_test, y_pred), annot=True, fmt='d', cmap='Blues')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix')
plt.show()