In [4]:
from google.colab import drive
drive.mount('https://drive.google.com/file/d/12u6rzKc8OuOVuRzPDvqrXUud4iDJbysH/view?usp=drive_link')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns


# Load the dataset
df = pd.read_csv('spotify-2023.csv')

# Display the first 10 rows
print(df.head(10))

# Check column names
print(df.columns)

# Display info
print(df.info())

# Fill NA values and drop others
df['Likes'].fillna(0, inplace=True)
df['Comments'].fillna(0, inplace=True)
df.dropna(inplace=True)

# Describe the cleaned data
print(df.describe())

# Explore album type
print(df['Album_type'].value_counts())

# Create a pie chart for Album Types
plt.figure(figsize=(8, 8))
df['Album_type'].value_counts().plot(kind='pie', autopct='%1.1f%%')
plt.title('Album Types')
plt.show()

# Analyze songs by artist and views-youtube, stream-spotify
top_artists_youtube = df.groupby('Artist')['Views'].sum().nlargest(10)
top_artists_spotify = df.groupby('Artist')['Stream'].sum().nlargest(10)

# Bar charts for Top 10 Artists on Youtube and Spotify
plt.figure(figsize=(16, 6))
plt.subplot(1, 2, 1)
top_artists_youtube.plot(kind='bar')
plt.title('Top 10 Artists on Youtube')
plt.ylabel('Total Views')

plt.subplot(1, 2, 2)
top_artists_spotify.plot(kind='bar')
plt.title('Top 10 Artists on Spotify')
plt.ylabel('Total Streams')
plt.show()

# Analyze top 10 songs
top_songs_views = df.nlargest(10, 'Views')[['Track', 'Views']]
top_songs_comments = df.nlargest(10, 'Comments')[['Track', 'Comments']]
top_songs_likes = df.nlargest(10, 'Likes')[['Track', 'Likes']]

# Display top 10 songs
print("Top 10 Songs with Most Views:")
print(top_songs_views)
print("\nTop 10 Songs with Most Comments:")
print(top_songs_comments)
print("\nTop 10 Songs with Most Likes:")
print(top_songs_likes)

# Analyze Energy, Danceability, and Acousticness
energy_dance_acoustic = df[['Track', 'Energy', 'Danceability', 'Acousticness']]

# Bar chart of the Top 10 Songs by Stream with Energy, Danceability, and Acousticness
top_songs_stream = df.nlargest(10, 'Stream')
plt.figure(figsize=(10, 6))
sns.barplot(x='Track', y='Stream', data=top_songs_stream)
plt.title('Top 10 Songs by Stream with Energy, Danceability, and Acousticness')
plt.xticks(rotation=45)
plt.ylabel('Stream')
plt.show()

# Top 10 artists with more likes
top_artists_likes = df.groupby('Artist')['Likes'].sum().nlargest(10)

# Bar chart with Number of Likes per Artist
plt.figure(figsize=(10, 6))
top_artists_likes.plot(kind='bar')
plt.title('Number of Likes per Artist')
plt.ylabel('Total Likes')
plt.show()

# Explore single releases by artists
single_releases = df[df['Album_type'] == 'single'].groupby('Artist')[['Views', 'Likes']].sum()

# Display table for single releases
print("Single Releases by Artists:")
print(single_releases)

# Explore compilation releases by artists
compilation_releases = df[df['Album_type'] == 'compilation'].groupby('Artist')[['Views', 'Likes']].sum()

# Display table for compilation releases
print("Compilation Releases by Artists:")
print(compilation_releases)

# Top 10 channels based on views
top_channels = df.groupby('Channel')['Views'].sum().nlargest(10)

# Bar chart of top channels
plt.figure(figsize=(10, 6))
top_channels.plot(kind='bar')
plt.title('Top 10 Channels based on Views')
plt.ylabel('Total Views')
plt.show()

ValueError: Mountpoint must be in a directory that exists