## **import necessary libraries**

In [2]:
import pandas as pd 
import numpy as np 
import calendar
import plotly.express as px 
import plotly.figure_factory as ff 
import plotly.graph_objects as go 
from plotly.subplots import make_subplots

## **Load dataset**

In [3]:
df=pd.read_csv("billboard_complete.csv")

#### **variables**

In [6]:
sequence=['hotpink','rebeccapurple']

df["chart_week"] =pd.to_datetime(df["chart_week"])

df["Month"] = df["chart_week"].dt.month
df["Year"] = df["chart_week"].dt.year
df['Month'] = df['Month'].apply(lambda x: calendar.month_name[x])

# dropping inaccurate rows
df = df[~df['Year'].isin([1976, 1977])]

In [4]:
df.columns

Index(['chart_week', 'current_week', 'title', 'performer', 'last_week',
       'peak_pos', 'wks_on_chart', 'genre(s)', 'explicit', 'duration-ms',
       'tempo', 'loudness', 'energy', 'danceability'],
      dtype='object')

### **visualization of tempo distribution**

In [5]:
px.box(data_frame=df , x = 'tempo')

### **when is rap in the top 10**

In [7]:
df['genre(s)'].fillna(" ")
x=df[df['genre(s)'].str.contains('rap', case=False, regex=False, na=False)]
xx=x[x['current_week']<=10]
xx['Month'].value_counts().to_frame()


Unnamed: 0_level_0,count
Month,Unnamed: 1_level_1
May,87
July,87
April,85
June,76
October,75
August,74
September,70
March,66
February,59
January,53


In [8]:
px.histogram(data_frame=xx , x = 'Month' , text_auto=True )

### **tempo for songs that are explicit and songs that are not for each year**

In [9]:
x=df[df["current_week"]<=5]
x=x.groupby(["Year"])["tempo"].mean().reset_index()
x

Unnamed: 0,Year,tempo
0,2020,124.4923
1,2021,128.4933
2,2022,131.922517
3,2023,123.935977
4,2024,120.952004
5,2025,128.952


In [10]:
px.strip(data_frame=df , x = 'Year' ,y='tempo',
             color='explicit',color_discrete_sequence=sequence)

### **dacneability of songs that are explicit and songs that are not**

In [13]:
px.strip(data_frame=df,x='danceability',color='explicit',color_discrete_sequence=sequence)

### **Percentage of explicit songs for each year**

In [14]:
mask=df[df['Year']==2024]


In [13]:
px.pie(data_frame=mask , names = 'explicit', color_discrete_sequence=sequence,title="Percentage of explicit tracks in 2024",hole=0.3)

### **relation between danceability and the track's chart position**

In [15]:
px.scatter(
    df,  x='current_week',  y='danceability', 
    title='Danceability vs. Chart Position',
    labels={'current_week': 'Current Week Position'}
)

### **relation between the track's loudness and its chart position (specifically in the year 2020)**

In [16]:
mask=df[df['Year']==2020]

In [17]:
px.scatter(
    mask,  x='current_week',  y='loudness', 
    title='loudness vs. Chart Position',
    labels={'current_week': 'Current Week Position'}
)

### **distriution of song durations (with a mask to ignore outliers)**

In [20]:
mask=df[df['duration-ms']<=500000]

In [21]:

px.histogram( mask,   x='duration-ms', title='Distribution of Song Durations',labels={'duration-ms': 'Duration (ms)'})


### **correlation of audio features**

In [23]:
audio_features = df[['loudness', 'energy', 'danceability', 'tempo', 'duration-ms']]

corr = audio_features.corr()

px.imshow(
    corr, 
    text_auto=True, 
    title='Correlation Matrix of Audio Features',
    labels=dict(x="Feature", y="Feature", color_continuous_scale="inferno")
)


### **pie chart reflecting the precentage of explicit songs for each year**

In [25]:


fig = make_subplots(
    rows=3, 
    cols=2, 
    specs=[
        [{"type": "domain"}, {"type": "domain"}],  
        [{"type": "domain"}, {"type": "domain"}],  
        [{"type": "domain"}, {"type": "domain"}]   
    ],
    subplot_titles=[
        "2025 Explicit Tracks", "2024 Explicit Tracks",
        "2023 Explicit Tracks", "2022 Explicit Tracks",
        "2021 Explicit Tracks", "2020 Explicit Tracks"
    ]
)
def add_pie(fig, year, row, col, sequence):
    mask = df[df['Year'] == year]
    pie = px.pie(
        data_frame=mask, 
        names='explicit', 
        color_discrete_sequence=sequence,
        title=f"Explicit Tracks in {year}",
        hole=0.3
    )
    fig.add_trace(pie.data[0], row=row, col=col)


add_pie(fig, 2025, 1, 1, sequence)
add_pie(fig, 2024, 1, 2, sequence)
add_pie(fig, 2023, 2, 1, sequence)
add_pie(fig, 2022, 2, 2, sequence)
add_pie(fig, 2021, 3, 1, sequence)
add_pie(fig, 2020, 3, 2, sequence)

# Update layout for spacing and title
fig.update_layout(
    height=1000,  # Adjust height to fit all subplots
    title_text="Percentage of Explicit Tracks by Year",
    showlegend=False  # Hide legend to save space
)

fig.show()