In [40]:
import pandas as pd

# Load your cleaned CSV file
df = pd.read_csv("lsapp_processed.csv")

# Ensure data is sorted by user and time so sequences make sense
df = df.sort_values(['user_id', 'timestamp'])

# Create 'next_app' column
df['next_app'] = df.groupby('user_id')['app_name'].shift(-1)

# Build the full transition matrix
transitions = df.groupby(['app_name', 'next_app']).size().unstack(fill_value=0)

# Find the total counts of each app (both as current and next app)
app_usage_counts = df['app_name'].value_counts() + df['next_app'].value_counts()
app_usage_counts = app_usage_counts.dropna()  # Drop NaN if any

# Get the top 20 apps based on total counts
top_20_apps = app_usage_counts.sort_values(ascending=False).head(20).index

# Filter the transition matrix to only those top 20 apps for rows and columns
filtered_transitions = transitions.loc[top_20_apps, top_20_apps].fillna(0)

# Save the matrix as a CSV file
filtered_transitions.to_csv('top20_app_transitions.csv')
print("Saved CSV: top20_app_transitions.csv")

Saved CSV: top20_app_transitions.csv


In [41]:
next_app_mode = df.groupby('app_name')['next_app'].agg(lambda x: x.mode()[0] if not x.mode().empty else None)
print("Most likely next app after each app:")
print(next_app_mode)

Most likely next app after each app:
app_name
amazon shopping                     google
android in call ui                contacts
aol                     facebook messenger
army men strike            army men strike
badoo                   whatsapp messenger
                               ...        
wechat                               faceu
whatsapp messenger                  google
words with friends 2         google chrome
yahoo mail                   google chrome
youtube                             google
Name: next_app, Length: 87, dtype: object


In [42]:
top5 = df['app_name'].value_counts().head(5)
print("Top 5 most used apps:")
print(top5)

Top 5 most used apps:
app_name
google                74824
google chrome         74126
facebook messenger    45231
facebook              44397
gmail                 29547
Name: count, dtype: int64


In [43]:
apps_per_user = df.groupby('user_id')['app_name'].nunique()

# Convert the Series to a DataFrame (optional, but nicer for CSV)
apps_per_user_df = apps_per_user.reset_index()
apps_per_user_df.columns = ['user_id', 'unique_apps_used']

# Save to CSV
apps_per_user_df.to_csv('apps_per_user_distribution.csv', index=False)

print("CSV saved: apps_per_user_distribution.csv")

CSV saved: apps_per_user_distribution.csv
