In [None]:
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt


data = pd.read_csv('/content/dataset.csv')


location_data = data[['log_latitude_range', 'log_longitude_range']]


kmeans = KMeans(n_clusters=3)
kmeans.fit(location_data)
cluster_labels = kmeans.labels_


plt.scatter(data['log_latitude_range'], data['log_longitude_range'], c=cluster_labels, cmap='viridis')
plt.xlabel('Log Latitude Range')
plt.ylabel('Log Longitude Range')
plt.title('K-means Clustering of User Locations')
plt.colorbar(label='Cluster')
plt.show()


clustered_data = data.copy()
clustered_data['Cluster'] = cluster_labels


specified_activities = ['Sitting', 'Lying_down', 'Fix_running', 'Fix_walking', 'OR_standing']
clustered_activities = clustered_data[clustered_data['Activity'].isin(specified_activities)]
activity_counts = clustered_activities.groupby('Cluster')['Activity'].value_counts(normalize=True).unstack().fillna(0)

fig, axes = plt.subplots(1, 3, figsize=(15, 5))

for i, (cluster_label, activity_percentages) in enumerate(activity_counts.iterrows()):
    axes[i].pie(activity_percentages, labels=activity_percentages.index, autopct='%1.1f%%', startangle=140)
    axes[i].set_title(f'Cluster {cluster_label}')

plt.tight_layout()
plt.show()
